[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] [PATCH v4 4/4] x86: use PDEP for PTE flags insertion when available
This replaces 5 instructions by a single one, further reducing code size, cache, and TLB footprint (in particular on systems supporting BMI2). Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx> --- Irrespective of the note regarding a possible alternative route, I think the change here is an improvement until someone would take on investigating whether the proposed dropping of the 24-bit flags representation would be a worthwhile improvement of generated code. --- v3: Move this patch last in series. Re-base. v2: Avoid quoted symbols; use gcc's new V operand modifier instead. Re-base. --- TBD: Also change get_pte_flags() (after having introduced test_pte_flags())? --- a/xen/arch/x86/mm.c +++ b/xen/arch/x86/mm.c @@ -393,6 +393,8 @@ void __init arch_init_memory(void) paddr_t __read_mostly ma_real_mask = ~0UL; unsigned long __read_mostly pfn_real_mask = ~0UL; +const intpte_t pte_flags_mask = ~(PADDR_MASK & PAGE_MASK); + #ifndef CONFIG_INDIRECT_THUNK /* V modifier unavailable? */ /* Conversion between machine address and direct map offset. */ @@ -419,6 +421,11 @@ unsigned long pfn2pdx(unsigned long pfn) return generic_pfn_to_pdx(pfn); } +intpte_t put_pte_flags_v(unsigned int flags) +{ + return put_pte_flags_c(flags); +} + #endif int page_is_ram_type(unsigned long mfn, unsigned long mem_type) --- a/xen/include/asm-x86/x86_64/page.h +++ b/xen/include/asm-x86/x86_64/page.h @@ -205,15 +205,53 @@ typedef l4_pgentry_t root_pgentry_t; /* Extract flags into 24-bit integer, or turn 24-bit flags into a pte mask. */ #ifndef __ASSEMBLY__ +extern const intpte_t pte_flags_mask; +intpte_t __attribute_const__ put_pte_flags_v(unsigned int x); + static inline unsigned int get_pte_flags(intpte_t x) { return ((x >> 40) & ~0xfff) | (x & 0xfff); } -static inline intpte_t put_pte_flags(unsigned int x) +static inline intpte_t put_pte_flags_c(unsigned int x) { return (((intpte_t)x & ~0xfff) << 40) | (x & 0xfff); } + +static always_inline intpte_t put_pte_flags(unsigned int x) +{ + intpte_t pte; + + if ( __builtin_constant_p(x) ) + return put_pte_flags_c(x); + +#ifdef CONFIG_INDIRECT_THUNK /* V modifier available? */ +#define SYMNAME(pfx...) #pfx "put_pte_flags_%V[pte]_%V[flags]" + alternative_io("call " SYMNAME() "\n\t" + LINKONCE_PROLOGUE(SYMNAME) "\n\t" + "mov %[flags], %k[pte]\n\t" + "and $0xfff000, %[flags]\n\t" + "and $0x000fff, %k[pte]\n\t" + "shl $40, %q[flags]\n\t" + "or %q[flags], %[pte]\n\t" + "ret\n\t" + LINKONCE_EPILOGUE(SYMNAME), + "pdep %[mask], %q[flags], %[pte]", X86_FEATURE_BMI2, + ASM_OUTPUT2([pte] "=&r" (pte), [flags] "+r" (x)), + [mask] "m" (pte_flags_mask)); +#undef SYMNAME +#else + alternative_io("call put_pte_flags_v", + /* pdep pte_flags_mask(%rip), %rdi, %rax */ + ".byte 0xc4, 0xe2, 0xc3, 0xf5, 0x05\n\t" + ".long pte_flags_mask - 4 - .", + X86_FEATURE_BMI2, + ASM_OUTPUT2("=a" (pte), "+D" (x)), "m" (pte_flags_mask) + : "rcx", "rdx", "rsi", "r8", "r9", "r10", "r11"); +#endif + + return pte; +} #endif /* _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxxxxxxxxx https://lists.xenproject.org/mailman/listinfo/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |