[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [Xen-devel] [PATCH 5/7] x86/alt: Support for automatic padding calculations
On Mon, Feb 12, 2018 at 11:23:05AM +0000, Andrew Cooper wrote: > The correct amount of padding in an origin patch site can be calculated > automatically, based on the relative lengths of the replacements. > > This requires a bit of trickery to calculate correctly, especially in the > ALTENRATIVE_2 case where a branchless max() calculation in needed. The > calculation is further complicated because GAS's idea of true is -1 rather > than 1, which is why the extra negations are required. > > Additionally, have apply_alternatives() attempt to optimise the padding nops. > > Signed-off-by: Andrew Cooper <andrew.cooper3@xxxxxxxxxx> LGTM, just a couple of nits: Reviewed-by: Roger Pau Monné <roger.pau@xxxxxxxxxx> > --- > CC: Jan Beulich <JBeulich@xxxxxxxx> > CC: Konrad Rzeszutek Wilk <konrad.wilk@xxxxxxxxxx> > CC: Roger Pau Monné <roger.pau@xxxxxxxxxx> > CC: Wei Liu <wei.liu2@xxxxxxxxxx> > --- > xen/arch/x86/alternative.c | 32 ++++++++++++++++++++++++---- > xen/include/asm-x86/alternative-asm.h | 40 > +++++++++++++++++++++++++++-------- > xen/include/asm-x86/alternative.h | 39 ++++++++++++++++++++++++++-------- > 3 files changed, 89 insertions(+), 22 deletions(-) > > diff --git a/xen/arch/x86/alternative.c b/xen/arch/x86/alternative.c > index f8ddab5..ec87ff4 100644 > --- a/xen/arch/x86/alternative.c > +++ b/xen/arch/x86/alternative.c > @@ -180,13 +180,37 @@ void init_or_livepatch apply_alternatives(const struct > alt_instr *start, > uint8_t *orig = ALT_ORIG_PTR(a); > uint8_t *repl = ALT_REPL_PTR(a); > uint8_t buf[MAX_PATCH_LEN]; > + unsigned int total_len = a->orig_len + a->pad_len; > > - BUG_ON(a->repl_len > a->orig_len); > - BUG_ON(a->orig_len > sizeof(buf)); > + BUG_ON(a->repl_len > total_len); > + BUG_ON(total_len > sizeof(buf)); > BUG_ON(a->cpuid >= NCAPINTS * 32); > > if ( !boot_cpu_has(a->cpuid) ) > + { > + unsigned int i; > + > + /* No replacement to make, but try to optimise any padding. */ > + if ( a->pad_len <= 1 ) > + continue; > + > + /* Search the padding area for any byte which isn't a nop. */ > + for ( i = a->orig_len; i < total_len; ++i ) > + if ( orig[i] != 0x90 ) Maybe better to compare against ASM_NOP1? > + break; > + > + /* > + * Only make any changes if all padding bytes are unoptimised > + * nops. With multiple alternatives over the same origin site, > we > + * may have already made a replacement, or optimised the nops. > + */ > + if ( i != total_len ) > + continue; > + > + add_nops(buf, a->pad_len); > + text_poke(orig + a->orig_len, buf, a->pad_len); > continue; > + } > > memcpy(buf, repl, a->repl_len); > > @@ -194,8 +218,8 @@ void init_or_livepatch apply_alternatives(const struct > alt_instr *start, > if ( a->repl_len >= 5 && (*buf & 0xfe) == 0xe8 ) > *(s32 *)(buf + 1) += repl - orig; > > - add_nops(buf + a->repl_len, a->orig_len - a->repl_len); > - text_poke(orig, buf, a->orig_len); > + add_nops(buf + a->repl_len, total_len - a->repl_len); > + text_poke(orig, buf, total_len); > } > } > > diff --git a/xen/include/asm-x86/alternative-asm.h > b/xen/include/asm-x86/alternative-asm.h > index 150bd1a..f7e37cb 100644 > --- a/xen/include/asm-x86/alternative-asm.h > +++ b/xen/include/asm-x86/alternative-asm.h > @@ -9,30 +9,41 @@ > * enough information for the alternatives patching code to patch an > * instruction. See apply_alternatives(). > */ > -.macro altinstruction_entry orig repl feature orig_len repl_len > +.macro altinstruction_entry orig repl feature orig_len repl_len pad_len > .long \orig - . > .long \repl - . > .word \feature > .byte \orig_len > .byte \repl_len > + .byte \pad_len > .endm > > #define orig_len (.L\@_orig_e - .L\@_orig_s) > +#define pad_len (.L\@_orig_p - .L\@_orig_e) > +#define total_len (.L\@_orig_p - .L\@_orig_s) > #define repl_len(nr) (.L\@_repl_e\()nr - .L\@_repl_s\()nr) > #define decl_repl(insn, nr) .L\@_repl_s\()nr: insn; .L\@_repl_e\()nr: > +#define gas_max(a, b) ((a) ^ (((a) ^ (b)) & -(-((a) < (b))))) That seems to work fine at least on newish versions of clang, so I'm not sure the g prefix is required (as_max). > > .macro ALTERNATIVE oldinstr, newinstr, feature > .L\@_orig_s: > \oldinstr > .L\@_orig_e: > + .skip (-((repl_len(1) - orig_len) > 0) * (repl_len(1) - orig_len)), 0x90 > +.L\@_orig_p: > > .pushsection .altinstructions, "a", @progbits > altinstruction_entry .L\@_orig_s, .L\@_repl_s1, \feature, \ > - orig_len, repl_len(1) > + orig_len, repl_len(1), pad_len > > .section .discard, "a", @progbits > - /* Assembler-time check that \newinstr isn't longer than \oldinstr. */ > - .byte 0xff + repl_len(1) - orig_len > + /* > + * Assembler-time checks: > + * - total_len <= 255 > + * - \newinstr <= total_len > + */ > + .byte total_len > + .byte 0xff + repl_len(1) - total_len > > .section .altinstr_replacement, "ax", @progbits > > @@ -45,18 +56,26 @@ > .L\@_orig_s: > \oldinstr > .L\@_orig_e: > + .skip (-((gas_max(repl_len(1), repl_len(2)) - orig_len) > 0) * \ > + (gas_max(repl_len(1), repl_len(2)) - orig_len)), 0x90 > +.L\@_orig_p: > > .pushsection .altinstructions, "a", @progbits > > altinstruction_entry .L\@_orig_s, .L\@_repl_s1, \feature1, \ > - orig_len, repl_len(1) > + orig_len, repl_len(1), pad_len > altinstruction_entry .L\@_orig_s, .L\@_repl_s2, \feature2, \ > - orig_len, repl_len(2) > + orig_len, repl_len(2), pad_len > > .section .discard, "a", @progbits > - /* Assembler-time check that \newinstr{1,2} aren't longer than > \oldinstr. */ > - .byte 0xff + repl_len(1) - orig_len > - .byte 0xff + repl_len(2) - orig_len > + /* > + * Assembler-time checks: > + * - total_len <= 255 > + * - \newinstr* <= total_len > + */ > + .byte total_len > + .byte 0xff + repl_len(1) - total_len > + .byte 0xff + repl_len(2) - total_len > > .section .altinstr_replacement, "ax", @progbits > > @@ -66,8 +85,11 @@ > .popsection > .endm > > +#undef gas_max > #undef decl_repl > #undef repl_len > +#undef total_len > +#undef pad_len > #undef orig_len > > #endif /* __ASSEMBLY__ */ > diff --git a/xen/include/asm-x86/alternative.h > b/xen/include/asm-x86/alternative.h > index 1e5cfbd..20dea22 100644 > --- a/xen/include/asm-x86/alternative.h > +++ b/xen/include/asm-x86/alternative.h > @@ -8,12 +8,13 @@ > #include <xen/stringify.h> > #include <xen/types.h> > > -struct alt_instr { > +struct __packed alt_instr { > int32_t orig_offset; /* original instruction */ > int32_t repl_offset; /* offset to replacement instruction */ > uint16_t cpuid; /* cpuid bit set for replacement */ > uint8_t orig_len; /* length of original instruction */ > - uint8_t repl_len; /* length of new instruction, <= instrlen */ > + uint8_t repl_len; /* length of new instruction */ > + uint8_t pad_len; /* length of build-time padding */ > }; > > #define __ALT_PTR(a,f) ((u8 *)((void *)&(a)->f + (a)->f)) > @@ -26,44 +27,64 @@ extern void apply_alternatives(const struct alt_instr > *start, > const struct alt_instr *end); > extern void alternative_instructions(void); > > -#define OLDINSTR(oldinstr) ".L%=_orig_s:\n\t" oldinstr > "\n.L%=_orig_e:\n" > - > #define repl_s(num) ".L%=_repl_s"#num > #define repl_e(num) ".L%=_repl_e"#num > > #define alt_orig_len "(.L%=_orig_e - .L%=_orig_s)" > +#define alt_pad_len "(.L%=_orig_p - .L%=_orig_e)" > +#define alt_total_len "(.L%=_orig_p - .L%=_orig_s)" > #define alt_repl_len(num) "(" repl_e(num) " - " repl_s(num) ")" > +#define gas_max(a, b) \ > + "((" a ") ^ (((" a ") ^ (" b ")) & -(-((" a ") < (" b ")))))" > + > +#define OLDINSTR_1(oldinstr, n1) \ > + ".L%=_orig_s:\n\t" oldinstr "\n .L%=_orig_e:\n\t" \ > + ".skip (-(("alt_repl_len(n1)"-"alt_orig_len") > 0) * " \ > + "("alt_repl_len(n1)"-"alt_orig_len")), 0x90\n\t" \ ASM_NOP1 instead of 0x90? Thanks, Roger. _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxxxxxxxxx https://lists.xenproject.org/mailman/listinfo/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |