On 10/02/16 12:57, Jan Beulich wrote:
Also drop an unnecessary va adjustment in the code being touched.
Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
--- a/xen/arch/x86/flushtlb.c
+++ b/xen/arch/x86/flushtlb.c
@@ -139,10 +139,12 @@ unsigned int flush_area_local(const void
c->x86_clflush_size && c->x86_cache_size && sz &&
((sz >> 10) < c->x86_cache_size) )
{
- va = (const void *)((unsigned long)va & ~(sz - 1));
+ alternative(ASM_NOP3, "sfence", X86_FEATURE_CLFLUSHOPT);
Why separate? This would be better in the lower alternative(), with
one single nop making up the difference in length. That way,
processors without CLFLUSHOPT don't suffer the 1 cycle instruction
decode stall from the redundant rex prefix.
~Andrew
for ( i = 0; i < sz; i += c->x86_clflush_size )
- asm volatile ( "clflush %0"
- : : "m" (((const char *)va)[i]) );
+ alternative_input("rex clflush %0",
+ "data16 clflush %0",
+ X86_FEATURE_CLFLUSHOPT,
+ "m" (((const char *)va)[i]));
}
else
{
--- a/xen/include/asm-x86/cpufeature.h
+++ b/xen/include/asm-x86/cpufeature.h
@@ -163,6 +163,7 @@
#define X86_FEATURE_ADX (7*32+19) /* ADCX, ADOX instructions */
#define X86_FEATURE_SMAP (7*32+20) /* Supervisor Mode Access Prevention */
#define X86_FEATURE_PCOMMIT (7*32+22) /* PCOMMIT instruction */
+#define X86_FEATURE_CLFLUSHOPT (7*32+23) /* CLFLUSHOPT instruction */
/* Intel-defined CPU features, CPUID level 0x00000007:0 (ecx), word 8 */
#define X86_FEATURE_PKU (8*32+ 3) /* Protection Keys for Userspace */
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel
|