[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [PATCH 7/7] x86/pv: Rewrite %dr6 handling



On 9/16/23 05:36, Andrew Cooper wrote:
> All #DB exceptions result in an update of %dr6, but this isn't handled
> properly by Xen for any guest type.
> 
> Remove all ad-hoc dr6 handling, leaving it to pv_inject_event() in most cases
> and using the new x86_merge_dr6() helper.
> 
> In do_debug(), swap the dr6 to pending_dbg in order to operate entirely with
> positive polarity.  Among other things, this helps spot RTM/BLD in the
> diagnostic message.
> 
> Drop the unconditional v->arch.dr6 adjustment.  pv_inject_event() performs the
> adjustment in the common case, but retain the prior behaviour if a debugger is
> attached.
> 
> Signed-off-by: Andrew Cooper <andrew.cooper3@xxxxxxxxxx>
> ---
> CC: Jan Beulich <JBeulich@xxxxxxxx>
> CC: Roger Pau Monné <roger.pau@xxxxxxxxxx>
> CC: Wei Liu <wl@xxxxxxx>
> CC: Jinoh Kang <jinoh.kang.kr@xxxxxxxxx>
> 
> v2:
>  * Split pieces out into an earlier patch.
>  * Extend do_debug() to use pending_dbg entirely, rather than have the same
>    variable used with different polarity at different times.
> ---
>  xen/arch/x86/pv/emul-priv-op.c  |  5 +----
>  xen/arch/x86/pv/emulate.c       |  9 +++++++--
>  xen/arch/x86/pv/ro-page-fault.c |  4 ++--
>  xen/arch/x86/pv/traps.c         | 17 +++++++++++++----
>  xen/arch/x86/traps.c            | 23 +++++++++++------------
>  5 files changed, 34 insertions(+), 24 deletions(-)
> 
> diff --git a/xen/arch/x86/pv/emul-priv-op.c b/xen/arch/x86/pv/emul-priv-op.c
> index 2f73ed0682e1..fe2011f28e34 100644
> --- a/xen/arch/x86/pv/emul-priv-op.c
> +++ b/xen/arch/x86/pv/emul-priv-op.c
> @@ -1364,10 +1364,7 @@ int pv_emulate_privileged_op(struct cpu_user_regs 
> *regs)
>          ASSERT(!curr->arch.pv.trap_bounce.flags);
>  
>          if ( ctxt.ctxt.retire.pending_dbg )
> -        {
> -            curr->arch.dr6 |= ctxt.ctxt.retire.pending_dbg | 
> DR_STATUS_RESERVED_ONE;
> -            pv_inject_hw_exception(X86_EXC_DB, X86_EVENT_NO_EC);
> -        }
> +            pv_inject_DB(ctxt.ctxt.retire.pending_dbg);
>  
>          /* fall through */
>      case X86EMUL_RETRY:
> diff --git a/xen/arch/x86/pv/emulate.c b/xen/arch/x86/pv/emulate.c
> index e7a1c0a2cc4f..29425a0a00ec 100644
> --- a/xen/arch/x86/pv/emulate.c
> +++ b/xen/arch/x86/pv/emulate.c
> @@ -71,10 +71,15 @@ void pv_emul_instruction_done(struct cpu_user_regs *regs, 
> unsigned long rip)
>  {
>      regs->rip = rip;
>      regs->eflags &= ~X86_EFLAGS_RF;
> +
>      if ( regs->eflags & X86_EFLAGS_TF )
>      {
> -        current->arch.dr6 |= DR_STEP | DR_STATUS_RESERVED_ONE;
> -        pv_inject_hw_exception(X86_EXC_DB, X86_EVENT_NO_EC);
> +        /*
> +         * TODO, this should generally use TF from the start of the
> +         * instruction.  It's only a latent bug for now, as this path isn't
> +         * used for any instruction which modifies eflags.
> +         */
> +        pv_inject_DB(X86_DR6_BS);
>      }
>  }
>  
> diff --git a/xen/arch/x86/pv/ro-page-fault.c b/xen/arch/x86/pv/ro-page-fault.c
> index cad28ef928ad..f6bb33556e72 100644
> --- a/xen/arch/x86/pv/ro-page-fault.c
> +++ b/xen/arch/x86/pv/ro-page-fault.c
> @@ -389,8 +389,8 @@ int pv_ro_page_fault(unsigned long addr, struct 
> cpu_user_regs *regs)
>  
>          /* Fallthrough */
>      case X86EMUL_OKAY:
> -        if ( ctxt.retire.singlestep )
> -            pv_inject_hw_exception(X86_EXC_DB, X86_EVENT_NO_EC);
> +        if ( ctxt.retire.pending_dbg )
> +            pv_inject_DB(ctxt.retire.pending_dbg);
>  
>          /* Fallthrough */
>      case X86EMUL_RETRY:
> diff --git a/xen/arch/x86/pv/traps.c b/xen/arch/x86/pv/traps.c
> index 74f333da7e1c..553b04bca956 100644
> --- a/xen/arch/x86/pv/traps.c
> +++ b/xen/arch/x86/pv/traps.c
> @@ -13,6 +13,7 @@
>  #include <xen/softirq.h>
>  
>  #include <asm/pv/trace.h>
> +#include <asm/debugreg.h>
>  #include <asm/shared.h>
>  #include <asm/traps.h>
>  #include <irq_vectors.h>
> @@ -50,9 +51,9 @@ void pv_inject_event(const struct x86_event *event)
>      tb->cs    = ti->cs;
>      tb->eip   = ti->address;
>  
> -    if ( event->type == X86_EVENTTYPE_HW_EXCEPTION &&
> -         vector == X86_EXC_PF )
> +    switch ( vector | -(event->type == X86_EVENTTYPE_SW_INTERRUPT) )
>      {
> +    case X86_EXC_PF:
>          curr->arch.pv.ctrlreg[2] = event->cr2;
>          arch_set_cr2(curr, event->cr2);
>  
> @@ -62,9 +63,17 @@ void pv_inject_event(const struct x86_event *event)
>              error_code |= PFEC_user_mode;
>  
>          trace_pv_page_fault(event->cr2, error_code);
> -    }
> -    else
> +        break;
> +
> +    case X86_EXC_DB:
> +        curr->arch.dr6 = x86_merge_dr6(curr->domain->arch.cpu_policy,
> +                                       curr->arch.dr6, event->pending_dbg);
> +        /* Fallthrough */
> +
> +    default:
>          trace_pv_trap(vector, regs->rip, use_error_code, error_code);
> +        break;
> +    }
>  
>      if ( use_error_code )
>      {
> diff --git a/xen/arch/x86/traps.c b/xen/arch/x86/traps.c
> index dead728ce329..447edc827b3a 100644
> --- a/xen/arch/x86/traps.c
> +++ b/xen/arch/x86/traps.c
> @@ -1887,11 +1887,11 @@ void do_device_not_available(struct cpu_user_regs 
> *regs)
>  /* SAF-1-safe */
>  void do_debug(struct cpu_user_regs *regs)
>  {
> -    unsigned long dr6;
> +    unsigned long pending_dbg;
>      struct vcpu *v = current;
>  
> -    /* Stash dr6 as early as possible. */
> -    dr6 = read_debugreg(6);
> +    /* Stash dr6 as early as possible, operating with positive polarity. */
> +    pending_dbg = read_debugreg(6) ^ X86_DR6_DEFAULT;

We don't reset DR6 after reading it, and there is no guarantee that the PV guest
will reset it either, so it doesn't match PENDING_DBG exactly IIRC.

On the other hand, nothing will probably care about its double-accumulating
quirk except perhaps monitor users.

Do we want to document that, shadow DR6 for PV (which seems a little overkill
if we don't trap DR6 access from PV already), or just live with that?

>  
>      /*
>       * At the time of writing (March 2018), on the subject of %dr6:
> @@ -1963,13 +1963,13 @@ void do_debug(struct cpu_user_regs *regs)
>           * If however we do, safety measures need to be enacted.  Use a big
>           * hammer and clear all debug settings.
>           */
> -        if ( dr6 & (DR_TRAP3 | DR_TRAP2 | DR_TRAP1 | DR_TRAP0) )
> +        if ( pending_dbg & X86_DR6_BP_MASK )
>          {
>              unsigned int bp, dr7 = read_debugreg(7);
>  
>              for ( bp = 0; bp < 4; ++bp )
>              {
> -                if ( (dr6 & (1u << bp)) && /* Breakpoint triggered? */
> +                if ( (pending_dbg & (1u << bp)) && /* Breakpoint triggered? 
> */
>                       (dr7 & (3u << (bp * DR_ENABLE_SIZE))) && /* Enabled? */
>                       ((dr7 & (3u << ((bp * DR_CONTROL_SIZE) + /* Insn? */
>                                       DR_CONTROL_SHIFT))) == DR_RW_EXECUTE) )
> @@ -1990,24 +1990,23 @@ void do_debug(struct cpu_user_regs *regs)
>           * so ensure the message is ratelimited.
>           */
>          gprintk(XENLOG_WARNING,
> -                "Hit #DB in Xen context: %04x:%p [%ps], stk %04x:%p, dr6 
> %lx\n",
> +                "Hit #DB in Xen context: %04x:%p [%ps], stk %04x:%p, 
> pending_dbg %lx\n",
>                  regs->cs, _p(regs->rip), _p(regs->rip),
> -                regs->ss, _p(regs->rsp), dr6);
> +                regs->ss, _p(regs->rsp), pending_dbg);
>  
>          return;
>      }
>  
> -    /* Save debug status register where guest OS can peek at it */
> -    v->arch.dr6 |= (dr6 & ~X86_DR6_DEFAULT);
> -    v->arch.dr6 &= (dr6 | ~X86_DR6_DEFAULT);
> -
>      if ( guest_kernel_mode(v, regs) && v->domain->debugger_attached )
>      {
> +        /* Save debug status register where gdbsx can peek at it */
> +        v->arch.dr6 = x86_merge_dr6(v->domain->arch.cpu_policy,
> +                                    v->arch.dr6, pending_dbg);
>          domain_pause_for_debugger();
>          return;
>      }
>  
> -    pv_inject_hw_exception(X86_EXC_DB, X86_EVENT_NO_EC);
> +    pv_inject_DB(pending_dbg);
>  }
>  
>  /* SAF-1-safe */

-- 
Sincerely,
Jinoh Kang




 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.