[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [PATCH 2/4] x86emul: move stubs off the stack



On 18/05/15 13:46, Jan Beulich wrote:
> This is needed as stacks are going to become non-executable.
>
> Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
>
> --- a/tools/tests/x86_emulator/x86_emulate.c
> +++ b/tools/tests/x86_emulator/x86_emulate.c
> @@ -17,4 +17,8 @@ typedef bool bool_t;
>  #define __packed __attribute__((packed))
>  
>  #include "x86_emulate/x86_emulate.h"
> +
> +#define get_stub(stb) ((void *)((stb).addr = (uintptr_t)(stb).buf))
> +#define put_stub(stb)
> +
>  #include "x86_emulate/x86_emulate.c"
> --- a/xen/arch/x86/x86_emulate.c
> +++ b/xen/arch/x86/x86_emulate.c
> @@ -9,6 +9,7 @@
>   *    Keir Fraser <keir@xxxxxxx>
>   */
>  
> +#include <xen/domain_page.h>
>  #include <asm/x86_emulate.h>
>  #include <asm/asm_defns.h> /* mark_regs_dirty() */
>  #include <asm/processor.h> /* current_cpu_info */
> @@ -17,8 +18,22 @@
>  /* Avoid namespace pollution. */
>  #undef cmpxchg
>  #undef cpuid
> +#undef wbinvd
>  
>  #define cpu_has_amd_erratum(nr) \
>          cpu_has_amd_erratum(&current_cpu_data, AMD_ERRATUM_##nr)
>  
> +#define get_stub(stb) ({                                   \
> +    (stb).addr = this_cpu(stubs.addr) + STUB_BUF_SIZE / 2; \
> +    ((stb).ptr = map_domain_page(this_cpu(stubs.mfn))) +   \
> +        ((stb).addr & (PAGE_SIZE - 1));                    \
> +})
> +#define put_stub(stb) ({                                   \
> +    if ( (stb).ptr )                                       \
> +    {                                                      \
> +        unmap_domain_page((stb).ptr);                      \
> +        (stb).ptr = NULL;                                  \
> +    }                                                      \
> +})
> +

These don't need to be macros, and I suspect a compiler would choose to
out-of-line get_stub() if it could.

>  #include "x86_emulate/x86_emulate.c"
> --- a/xen/arch/x86/x86_emulate/x86_emulate.c
> +++ b/xen/arch/x86/x86_emulate/x86_emulate.c
> @@ -717,11 +717,14 @@ do{ struct fpu_insn_ctxt fic;           
>  } while (0)
>  
>  #define emulate_fpu_insn_stub(_bytes...)                                \
> -do{ uint8_t stub[] = { _bytes, 0xc3 };                                  \
> -    struct fpu_insn_ctxt fic = { .insn_bytes = sizeof(stub)-1 };        \
> +do{ uint8_t *buf = get_stub(stub);                                      \
> +    unsigned int _nr = sizeof((uint8_t[]){ _bytes });                   \
> +    struct fpu_insn_ctxt fic = { .insn_bytes = _nr };                    \

Alignment of backslashes.

> +    memcpy(buf, ((uint8_t[]){ _bytes, 0xc3 }), _nr + 1);                \
>      get_fpu(X86EMUL_FPU_fpu, &fic);                                     \
> -    (*(void(*)(void))stub)();                                           \
> +    stub.func();                                                        \
>      put_fpu(&fic);                                                      \
> +    put_stub(stub);                                                     \
>  } while (0)
>  
>  static unsigned long _get_rep_prefix(
> @@ -1458,6 +1461,7 @@ x86_emulate(
>      struct operand src = { .reg = REG_POISON };
>      struct operand dst = { .reg = REG_POISON };
>      enum x86_swint_type swint_type;
> +    struct x86_emulate_stub stub = {};
>      DECLARE_ALIGNED(mmval_t, mmval);
>      /*
>       * Data operand effective address (usually computed from ModRM).
> @@ -3792,6 +3796,7 @@ x86_emulate(
>  
>   done:
>      _put_fpu();
> +    put_stub(stub);
>      return rc;
>  
>   twobyte_insn:
> @@ -4007,9 +4012,15 @@ x86_emulate(
>                 /* {,v}movss xmm,xmm/m32 */
>                 /* {,v}movsd xmm,xmm/m64 */
>      {
> -        uint8_t stub[] = { 0x3e, 0x3e, 0x0f, b, modrm, 0xc3 };
> -        struct fpu_insn_ctxt fic = { .insn_bytes = sizeof(stub)-1 };
> +        uint8_t *buf = get_stub(stub);
> +        struct fpu_insn_ctxt fic = { .insn_bytes = 5 };
>  
> +        buf[0] = 0x3e;
> +        buf[1] = 0x3e;
> +        buf[2] = 0x0f;
> +        buf[3] = b;
> +        buf[4] = modrm;
> +        buf[5] = 0xc3;
>          if ( vex.opcx == vex_none )
>          {
>              if ( vex.pfx & VEX_PREFIX_DOUBLE_MASK )
> @@ -4017,7 +4028,7 @@ x86_emulate(
>              else
>                  vcpu_must_have_sse();
>              ea.bytes = 16;
> -            SET_SSE_PREFIX(stub[0], vex.pfx);
> +            SET_SSE_PREFIX(buf[0], vex.pfx);
>              get_fpu(X86EMUL_FPU_xmm, &fic);
>          }
>          else
> @@ -4044,15 +4055,16 @@ x86_emulate(
>              /* convert memory operand to (%rAX) */
>              rex_prefix &= ~REX_B;
>              vex.b = 1;
> -            stub[4] &= 0x38;
> +            buf[4] &= 0x38;
>          }
>          if ( !rc )
>          {
> -           copy_REX_VEX(stub, rex_prefix, vex);
> -           asm volatile ( "call *%0" : : "r" (stub), "a" (mmvalp)
> +           copy_REX_VEX(buf, rex_prefix, vex);
> +           asm volatile ( "call *%0" : : "r" (stub.func), "a" (mmvalp)
>                                       : "memory" );
>          }
>          put_fpu(&fic);
> +        put_stub(stub);
>          if ( !rc && (b & 1) && (ea.type == OP_MEM) )
>              rc = ops->write(ea.mem.seg, ea.mem.off, mmvalp,
>                              ea.bytes, ctxt);
> @@ -4242,9 +4254,15 @@ x86_emulate(
>                 /* {,v}movdq{a,u} xmm,xmm/m128 */
>                 /* vmovdq{a,u} ymm,ymm/m256 */
>      {
> -        uint8_t stub[] = { 0x3e, 0x3e, 0x0f, b, modrm, 0xc3 };
> -        struct fpu_insn_ctxt fic = { .insn_bytes = sizeof(stub)-1 };
> +        uint8_t *buf = get_stub(stub);
> +        struct fpu_insn_ctxt fic = { .insn_bytes = 5 };
>  
> +        buf[0] = 0x3e;
> +        buf[1] = 0x3e;
> +        buf[2] = 0x0f;
> +        buf[3] = b;
> +        buf[4] = modrm;
> +        buf[5] = 0xc3;
>          if ( vex.opcx == vex_none )
>          {
>              switch ( vex.pfx )
> @@ -4252,7 +4270,7 @@ x86_emulate(
>              case vex_66:
>              case vex_f3:
>                  vcpu_must_have_sse2();
> -                stub[0] = 0x66; /* movdqa */
> +                buf[0] = 0x66; /* movdqa */
>                  get_fpu(X86EMUL_FPU_xmm, &fic);
>                  ea.bytes = 16;
>                  break;
> @@ -4288,15 +4306,16 @@ x86_emulate(
>              /* convert memory operand to (%rAX) */
>              rex_prefix &= ~REX_B;
>              vex.b = 1;
> -            stub[4] &= 0x38;
> +            buf[4] &= 0x38;
>          }
>          if ( !rc )
>          {
> -           copy_REX_VEX(stub, rex_prefix, vex);
> -           asm volatile ( "call *%0" : : "r" (stub), "a" (mmvalp)
> +           copy_REX_VEX(buf, rex_prefix, vex);
> +           asm volatile ( "call *%0" : : "r" (stub.func), "a" (mmvalp)
>                                       : "memory" );
>          }
>          put_fpu(&fic);
> +        put_stub(stub);
>          if ( !rc && (b != 0x6f) && (ea.type == OP_MEM) )
>              rc = ops->write(ea.mem.seg, ea.mem.off, mmvalp,
>                              ea.bytes, ctxt);
> @@ -4638,5 +4657,6 @@ x86_emulate(
>  
>   cannot_emulate:
>      _put_fpu();
> +    put_stub(stub);
>      return X86EMUL_UNHANDLEABLE;
>  }
> --- a/xen/arch/x86/x86_emulate/x86_emulate.h
> +++ b/xen/arch/x86/x86_emulate/x86_emulate.h
> @@ -429,6 +429,18 @@ struct x86_emulate_ctxt
>      } retire;
>  };
>  
> +struct x86_emulate_stub {
> +    union {
> +        void (*func)(void);
> +        uintptr_t addr;
> +    };
> +#ifdef __XEN__
> +    void *ptr;
> +#else
> +    uint8_t buf[32];

16 is surely enough? The emulator will #GP if it attempts to fetch more
than 15 bytes, and only a 'ret' is needed after that.

~Andrew

> +#endif
> +};
> +
>  /*
>   * x86_emulate: Emulate an instruction.
>   * Returns -1 on failure, 0 on success.
>
>


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.