Xen project Mailing List

Re: [PATCH 7/7] x86/pv: Rewrite %dr6 handling

To: Andrew Cooper <andrew.cooper3@xxxxxxxxxx>, Xen-devel <xen-devel@xxxxxxxxxxxxxxxxxxxx>

From: Jinoh Kang <jinoh.kang.kr@xxxxxxxxx>

Date: Sat, 16 Sep 2023 16:50:39 +0900

Cc: Jan Beulich <JBeulich@xxxxxxxx>, Roger Pau Monné <roger.pau@xxxxxxxxxx>, Wei Liu <wl@xxxxxxx>

Delivery-date: Sat, 16 Sep 2023 07:50:55 +0000

List-id: Xen developer discussion <xen-devel.lists.xenproject.org>

On 9/16/23 05:36, Andrew Cooper wrote: > All #DB exceptions result in an update of %dr6, but this isn't handled > properly by Xen for any guest type. > > Remove all ad-hoc dr6 handling, leaving it to pv_inject_event() in most cases > and using the new x86_merge_dr6() helper. > > In do_debug(), swap the dr6 to pending_dbg in order to operate entirely with > positive polarity. Among other things, this helps spot RTM/BLD in the > diagnostic message. > > Drop the unconditional v->arch.dr6 adjustment. pv_inject_event() performs the > adjustment in the common case, but retain the prior behaviour if a debugger is > attached. > > Signed-off-by: Andrew Cooper <andrew.cooper3@xxxxxxxxxx> > --- > CC: Jan Beulich <JBeulich@xxxxxxxx> > CC: Roger Pau Monné <roger.pau@xxxxxxxxxx> > CC: Wei Liu <wl@xxxxxxx> > CC: Jinoh Kang <jinoh.kang.kr@xxxxxxxxx> > > v2: > * Split pieces out into an earlier patch. > * Extend do_debug() to use pending_dbg entirely, rather than have the same > variable used with different polarity at different times. > --- > xen/arch/x86/pv/emul-priv-op.c | 5 +---- > xen/arch/x86/pv/emulate.c | 9 +++++++-- > xen/arch/x86/pv/ro-page-fault.c | 4 ++-- > xen/arch/x86/pv/traps.c | 17 +++++++++++++---- > xen/arch/x86/traps.c | 23 +++++++++++------------ > 5 files changed, 34 insertions(+), 24 deletions(-) > > diff --git a/xen/arch/x86/pv/emul-priv-op.c b/xen/arch/x86/pv/emul-priv-op.c > index 2f73ed0682e1..fe2011f28e34 100644 > --- a/xen/arch/x86/pv/emul-priv-op.c > +++ b/xen/arch/x86/pv/emul-priv-op.c > @@ -1364,10 +1364,7 @@ int pv_emulate_privileged_op(struct cpu_user_regs > *regs) > ASSERT(!curr->arch.pv.trap_bounce.flags); > > if ( ctxt.ctxt.retire.pending_dbg ) > - { > - curr->arch.dr6 |= ctxt.ctxt.retire.pending_dbg | > DR_STATUS_RESERVED_ONE; > - pv_inject_hw_exception(X86_EXC_DB, X86_EVENT_NO_EC); > - } > + pv_inject_DB(ctxt.ctxt.retire.pending_dbg); > > /* fall through */ > case X86EMUL_RETRY: > diff --git a/xen/arch/x86/pv/emulate.c b/xen/arch/x86/pv/emulate.c > index e7a1c0a2cc4f..29425a0a00ec 100644 > --- a/xen/arch/x86/pv/emulate.c > +++ b/xen/arch/x86/pv/emulate.c > @@ -71,10 +71,15 @@ void pv_emul_instruction_done(struct cpu_user_regs *regs, > unsigned long rip) > { > regs->rip = rip; > regs->eflags &= ~X86_EFLAGS_RF; > + > if ( regs->eflags & X86_EFLAGS_TF ) > { > - current->arch.dr6 |= DR_STEP | DR_STATUS_RESERVED_ONE; > - pv_inject_hw_exception(X86_EXC_DB, X86_EVENT_NO_EC); > + /* > + * TODO, this should generally use TF from the start of the > + * instruction. It's only a latent bug for now, as this path isn't > + * used for any instruction which modifies eflags. > + */ > + pv_inject_DB(X86_DR6_BS); > } > } > > diff --git a/xen/arch/x86/pv/ro-page-fault.c b/xen/arch/x86/pv/ro-page-fault.c > index cad28ef928ad..f6bb33556e72 100644 > --- a/xen/arch/x86/pv/ro-page-fault.c > +++ b/xen/arch/x86/pv/ro-page-fault.c > @@ -389,8 +389,8 @@ int pv_ro_page_fault(unsigned long addr, struct > cpu_user_regs *regs) > > /* Fallthrough */ > case X86EMUL_OKAY: > - if ( ctxt.retire.singlestep ) > - pv_inject_hw_exception(X86_EXC_DB, X86_EVENT_NO_EC); > + if ( ctxt.retire.pending_dbg ) > + pv_inject_DB(ctxt.retire.pending_dbg); > > /* Fallthrough */ > case X86EMUL_RETRY: > diff --git a/xen/arch/x86/pv/traps.c b/xen/arch/x86/pv/traps.c > index 74f333da7e1c..553b04bca956 100644 > --- a/xen/arch/x86/pv/traps.c > +++ b/xen/arch/x86/pv/traps.c > @@ -13,6 +13,7 @@ > #include <xen/softirq.h> > > #include <asm/pv/trace.h> > +#include <asm/debugreg.h> > #include <asm/shared.h> > #include <asm/traps.h> > #include <irq_vectors.h> > @@ -50,9 +51,9 @@ void pv_inject_event(const struct x86_event *event) > tb->cs = ti->cs; > tb->eip = ti->address; > > - if ( event->type == X86_EVENTTYPE_HW_EXCEPTION && > - vector == X86_EXC_PF ) > + switch ( vector | -(event->type == X86_EVENTTYPE_SW_INTERRUPT) ) > { > + case X86_EXC_PF: > curr->arch.pv.ctrlreg[2] = event->cr2; > arch_set_cr2(curr, event->cr2); > > @@ -62,9 +63,17 @@ void pv_inject_event(const struct x86_event *event) > error_code |= PFEC_user_mode; > > trace_pv_page_fault(event->cr2, error_code); > - } > - else > + break; > + > + case X86_EXC_DB: > + curr->arch.dr6 = x86_merge_dr6(curr->domain->arch.cpu_policy, > + curr->arch.dr6, event->pending_dbg); > + /* Fallthrough */ > + > + default: > trace_pv_trap(vector, regs->rip, use_error_code, error_code); > + break; > + } > > if ( use_error_code ) > { > diff --git a/xen/arch/x86/traps.c b/xen/arch/x86/traps.c > index dead728ce329..447edc827b3a 100644 > --- a/xen/arch/x86/traps.c > +++ b/xen/arch/x86/traps.c > @@ -1887,11 +1887,11 @@ void do_device_not_available(struct cpu_user_regs > *regs) > /* SAF-1-safe */ > void do_debug(struct cpu_user_regs *regs) > { > - unsigned long dr6; > + unsigned long pending_dbg; > struct vcpu *v = current; > > - /* Stash dr6 as early as possible. */ > - dr6 = read_debugreg(6); > + /* Stash dr6 as early as possible, operating with positive polarity. */ > + pending_dbg = read_debugreg(6) ^ X86_DR6_DEFAULT; We don't reset DR6 after reading it, and there is no guarantee that the PV guest will reset it either, so it doesn't match PENDING_DBG exactly IIRC. On the other hand, nothing will probably care about its double-accumulating quirk except perhaps monitor users. Do we want to document that, shadow DR6 for PV (which seems a little overkill if we don't trap DR6 access from PV already), or just live with that? > > /* > * At the time of writing (March 2018), on the subject of %dr6: > @@ -1963,13 +1963,13 @@ void do_debug(struct cpu_user_regs *regs) > * If however we do, safety measures need to be enacted. Use a big > * hammer and clear all debug settings. > */ > - if ( dr6 & (DR_TRAP3 | DR_TRAP2 | DR_TRAP1 | DR_TRAP0) ) > + if ( pending_dbg & X86_DR6_BP_MASK ) > { > unsigned int bp, dr7 = read_debugreg(7); > > for ( bp = 0; bp < 4; ++bp ) > { > - if ( (dr6 & (1u << bp)) && /* Breakpoint triggered? */ > + if ( (pending_dbg & (1u << bp)) && /* Breakpoint triggered? > */ > (dr7 & (3u << (bp * DR_ENABLE_SIZE))) && /* Enabled? */ > ((dr7 & (3u << ((bp * DR_CONTROL_SIZE) + /* Insn? */ > DR_CONTROL_SHIFT))) == DR_RW_EXECUTE) ) > @@ -1990,24 +1990,23 @@ void do_debug(struct cpu_user_regs *regs) > * so ensure the message is ratelimited. > */ > gprintk(XENLOG_WARNING, > - "Hit #DB in Xen context: %04x:%p [%ps], stk %04x:%p, dr6 > %lx\n", > + "Hit #DB in Xen context: %04x:%p [%ps], stk %04x:%p, > pending_dbg %lx\n", > regs->cs, _p(regs->rip), _p(regs->rip), > - regs->ss, _p(regs->rsp), dr6); > + regs->ss, _p(regs->rsp), pending_dbg); > > return; > } > > - /* Save debug status register where guest OS can peek at it */ > - v->arch.dr6 |= (dr6 & ~X86_DR6_DEFAULT); > - v->arch.dr6 &= (dr6 | ~X86_DR6_DEFAULT); > - > if ( guest_kernel_mode(v, regs) && v->domain->debugger_attached ) > { > + /* Save debug status register where gdbsx can peek at it */ > + v->arch.dr6 = x86_merge_dr6(v->domain->arch.cpu_policy, > + v->arch.dr6, pending_dbg); > domain_pause_for_debugger(); > return; > } > > - pv_inject_hw_exception(X86_EXC_DB, X86_EVENT_NO_EC); > + pv_inject_DB(pending_dbg); > } > > /* SAF-1-safe */ -- Sincerely, Jinoh Kang

©2013 Xen Project, A Linux Foundation Collaborative Project. All Rights Reserved.
Linux Foundation is a registered trademark of The Linux Foundation.
Xen Project is a trademark of The Linux Foundation.