[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[PATCH 4/6] x86/trace: Reduce stack usage from HVMTRACE_ND()


  • To: Xen-devel <xen-devel@xxxxxxxxxxxxxxxxxxxx>
  • From: Andrew Cooper <andrew.cooper3@xxxxxxxxxx>
  • Date: Fri, 17 Sep 2021 09:45:57 +0100
  • Authentication-results: esa4.hc3370-68.iphmx.com; dkim=none (message not signed) header.i=none
  • Cc: Andrew Cooper <andrew.cooper3@xxxxxxxxxx>, George Dunlap <George.Dunlap@xxxxxxxxxxxxx>, Ian Jackson <iwj@xxxxxxxxxxxxxx>, Jan Beulich <JBeulich@xxxxxxxx>, Stefano Stabellini <sstabellini@xxxxxxxxxx>, Wei Liu <wl@xxxxxxx>, Julien Grall <julien@xxxxxxx>, Dario Faggioli <dfaggioli@xxxxxxxx>
  • Delivery-date: Fri, 17 Sep 2021 08:46:26 +0000
  • Ironport-data: A9a23:d1SstK8cXCf95j2PDyx5DrUDfHmTJUtcMsCJ2f8bNWPcYEJGY0x3z jFOWWGHOP2OZjb3e9AjbIvg/UtSupTUx9YwGVdupC88E34SpcT7XtnIdU2Y0wF+jyHgoOCLy +1EN7Es+ehtFie0Si9AttENlFEkvU2ybuOU5NXsZ2YhGGeIdA970Ug6w79g3dYx6TSEK1jlV e3a8pW31GCNg1aYAkpMg05UgEoy1BhakGpwUm0WPZinjneH/5UmJMt3yZWKB2n5WuFp8tuSH I4v+l0bElTxpH/BAvv9+lryn9ZjrrT6ZWBigVIOM0Sub4QrSoXfHc/XOdJFAXq7hQllkPgrm NRRtJeCUz0uBfz9p+8iCz5ZPBlXaPguFL/veRBTsOSWxkzCNXDt3+9vHAc9OohwFuRfWD8Us 6ZCcXZUM07F17neLLGTE4GAguwKKsXxMZxZkXZn1TzDVt4tQIzZQrWM7thdtNs1rp0SQquGN ppGAdZpRB/CbTwRAwwZMbsZluaMqWPeVDcJo2vA8MLb5ECMlVcsgdABKuH9eMGORMhTtlaVo CTB5WuRKjMwOcGbyDGF2mmxneKJliT+MKoeGrS3+f9thFy73XEIBVsdUl7Tiea9ol6zXZRYM UN80jojq+0++VKmSvH5XgakuziUsxgEQd1SHuYmrgaXxcL84hmbLngJSCZbb94rv9NwQiYlv neShM/gDzFrtLyTSFqe+62SoDf0PjIaRVLufgddE1FDuYO65thu0FSfFb6PDZJZkPWvSTDsw ju4thI5vOQissU0j4K60mju1mfESofycuIl2unGdjv7tVokNdH6OdXABUvztqkbfd3AJrWVl D1dwZHPsrpWZX2YvHHVGI0w8KeVC+Fp2dE2qWVmGYUovx+p8mSqFWy7yGAjfBo1WirolDmAX aMyhe+zzMQIVJdJRfUuC25UNyjM5fO4fTgCfqqPBueimrArKGe6ENhGPCZ8JVwBdXTAd4lkY f93lu73Vh4n5VlPlmLqF4/xL5dynn1WKZzvqWDTkE38jOv2iI+9YrYZKlqeBt3VH4vd+169z jqrDOPTk083eLSnOkH/qNdPRXhXfSlTLc2n8KR/K7/cSjeK7Ul8Upc9N5t6INc790mU/8+Vl kyAtrhwkwam3iybdlzRMxiOqtrHBP5CkJ7yBgR0VX7A5pTpSd/HAH43e8RlcL852vZkyPIoH fAJd9/ZWqZESyjd+iRbZp749dQweBOujAOIHiykfDlgIMIwG12XoofpLln16S0DLiurrs9i8 beu4RzWHMgYTAN4AceINP/2lwGtvWIQkf5ZVlfTJoUBY13l9YVncnSjjvI+L8wWBw/Ewz+Wi 1SfDRsC/LGfqI4p6tjZw6uDqt7xQed5G0NbGUjd7Kq3anaGrjbyn9cYXb/RLz7HVW7y9KGzX slvzqnxYK8dgVJHk4tgCLI3n6gw0MTi+u1BxQN+EXSVM1nyUuF8ImOL1NVkv7FWwuMLohO/X 0+C94UIObiNP8+5QlcdKBB8M7aG3PAQ3DLT8e40MAPx4youpOiLVkBbPh+tji1BLeQqbNN5k Ll54MNGuRaijhcKM8qdinEG/muBGXUMTqE7u8xIG4TskAcqlglPbJG05vUaO31ThwGg6nUXH wI=
  • Ironport-hdrordr: A9a23:Dqtkna1wdC6jgNq1E38FPAqjBLAkLtp133Aq2lEZdPRUGvb3qy nOpoVj6faaslYssR0b9exofZPwJE80lqQFh7X5X43SPzUO0VHAROoJgLcKgQeQfxEWntQtsp uIGJIeNDSfNzdHZL7BkWuFL+o=
  • Ironport-sdr: JSm8SffdyxHUd9GVLPoEWHdUZetYvTIPnsvJw++KHnM9g7TtIDSLADXykFj5d7TLYoGcH2iEVY WjjDFN0dKbyGZkmP1n30c94zl5/niS2MpbxpVhETM5l1iWfoCYpAzSfkg03znMt+HQRrcUxVLO S+EBalbPRG1YJmf7lZ6ASho8Mwhc9iiT+KCvCU/2SQDXQkQTEM76Yx/k1yIoiwBfQoy4ki0ezK qF4aH0SMs6FT4NwSN8yIiI4OJ1vROdfUTGJkJ+lNCVp327z8beqmgU+y7XFXN1oRx2EhMO3Vac net3eBX44u6KPgYql4hj69hI
  • List-id: Xen developer discussion <xen-devel.lists.xenproject.org>

It is pointless to write all 6 entries and only consume the useful subset.
bloat-o-meter shows quite how obscene the overhead is in vmx_vmexit_handler(),
weighing in at 11% of the function arranging unread zeroes on the stack, and
8% for svm_vmexit_handler().

  add/remove: 0/0 grow/shrink: 0/20 up/down: 0/-1867 (-1867)
  Function                                     old     new   delta
  hvm_msr_write_intercept                     1049    1033     -16
  vmx_enable_intr_window                       238     214     -24
  svm_enable_intr_window                       337     313     -24
  hvmemul_write_xcr                            115      91     -24
  hvmemul_write_cr                             350     326     -24
  hvmemul_read_xcr                             115      91     -24
  hvmemul_read_cr                              146     122     -24
  hvm_mov_to_cr                                438     414     -24
  hvm_mov_from_cr                              253     229     -24
  vmx_intr_assist                             1150    1118     -32
  svm_intr_assist                              459     427     -32
  hvm_rdtsc_intercept                          138     106     -32
  hvm_msr_read_intercept                       898     866     -32
  vmx_vmenter_helper                          1142    1094     -48
  vmx_inject_event                             813     765     -48
  svm_vmenter_helper                           238     190     -48
  hvm_hlt                                      197     146     -51
  svm_inject_event                            1678    1614     -64
  svm_vmexit_handler                          5880    5416    -464
  vmx_vmexit_handler                          7281    6473    -808
  Total: Before=3644184, After=3642317, chg -0.05%

Signed-off-by: Andrew Cooper <andrew.cooper3@xxxxxxxxxx>
---
CC: George Dunlap <George.Dunlap@xxxxxxxxxxxxx>
CC: Ian Jackson <iwj@xxxxxxxxxxxxxx>
CC: Jan Beulich <JBeulich@xxxxxxxx>
CC: Stefano Stabellini <sstabellini@xxxxxxxxxx>
CC: Wei Liu <wl@xxxxxxx>
CC: Julien Grall <julien@xxxxxxx>
CC: Dario Faggioli <dfaggioli@xxxxxxxx>

Normally I wouldn't recommend patches like this for backport, but
{vmx,svm}_vmexit_handler() are fastpaths and this is a *lot* of I-cache lines
dropped...
---
 xen/arch/x86/hvm/svm/svm.c      |  8 +++-----
 xen/arch/x86/hvm/vmx/vmx.c      |  9 ++++-----
 xen/include/asm-x86/hvm/trace.h | 28 ++++++++++------------------
 3 files changed, 17 insertions(+), 28 deletions(-)

diff --git a/xen/arch/x86/hvm/svm/svm.c b/xen/arch/x86/hvm/svm/svm.c
index afb1ccb342c2..f0e10dec046e 100644
--- a/xen/arch/x86/hvm/svm/svm.c
+++ b/xen/arch/x86/hvm/svm/svm.c
@@ -1052,7 +1052,7 @@ void svm_vmenter_helper(const struct cpu_user_regs *regs)
     if ( unlikely(tb_init_done) )
         HVMTRACE_ND(VMENTRY,
                     nestedhvm_vcpu_in_guestmode(curr) ? TRC_HVM_NESTEDFLAG : 0,
-                    1/*cycles*/, 0, 0, 0, 0, 0, 0, 0);
+                    1/*cycles*/);
 
     svm_sync_vmcb(curr, vmcb_needs_vmsave);
 
@@ -2565,12 +2565,10 @@ void svm_vmexit_handler(struct cpu_user_regs *regs)
 
     if ( hvm_long_mode_active(v) )
         HVMTRACE_ND(VMEXIT64, vcpu_guestmode ? TRC_HVM_NESTEDFLAG : 0,
-                    1/*cycles*/, 3, exit_reason,
-                    regs->eip, regs->rip >> 32, 0, 0, 0);
+                    1/*cycles*/, exit_reason, TRC_PAR_LONG(regs->rip));
     else
         HVMTRACE_ND(VMEXIT, vcpu_guestmode ? TRC_HVM_NESTEDFLAG : 0,
-                    1/*cycles*/, 2, exit_reason,
-                    regs->eip, 0, 0, 0, 0);
+                    1/*cycles*/, exit_reason, regs->eip);
 
     if ( vcpu_guestmode )
     {
diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
index b0a42d05f86a..d403e2d8060a 100644
--- a/xen/arch/x86/hvm/vmx/vmx.c
+++ b/xen/arch/x86/hvm/vmx/vmx.c
@@ -3864,11 +3864,10 @@ void vmx_vmexit_handler(struct cpu_user_regs *regs)
     __vmread(VM_EXIT_REASON, &exit_reason);
 
     if ( hvm_long_mode_active(v) )
-        HVMTRACE_ND(VMEXIT64, 0, 1/*cycles*/, 3, exit_reason,
-                    regs->eip, regs->rip >> 32, 0, 0, 0);
+        HVMTRACE_ND(VMEXIT64, 0, 1/*cycles*/, exit_reason,
+                    TRC_PAR_LONG(regs->rip));
     else
-        HVMTRACE_ND(VMEXIT, 0, 1/*cycles*/, 2, exit_reason,
-                    regs->eip, 0, 0, 0, 0);
+        HVMTRACE_ND(VMEXIT, 0, 1/*cycles*/, exit_reason, regs->eip);
 
     perfc_incra(vmexits, exit_reason);
 
@@ -4645,7 +4644,7 @@ bool vmx_vmenter_helper(const struct cpu_user_regs *regs)
     if ( unlikely(curr->arch.hvm.vmx.lbr_flags & LBR_FIXUP_MASK) )
         lbr_fixup();
 
-    HVMTRACE_ND(VMENTRY, 0, 1/*cycles*/, 0, 0, 0, 0, 0, 0, 0);
+    HVMTRACE_ND(VMENTRY, 0, 1/*cycles*/);
 
     __vmwrite(GUEST_RIP,    regs->rip);
     __vmwrite(GUEST_RSP,    regs->rsp);
diff --git a/xen/include/asm-x86/hvm/trace.h b/xen/include/asm-x86/hvm/trace.h
index 5cd459b855b7..2bbac45044ce 100644
--- a/xen/include/asm-x86/hvm/trace.h
+++ b/xen/include/asm-x86/hvm/trace.h
@@ -67,38 +67,30 @@
 #define TRACE_2_LONG_4D(_e, d1, d2, d3, d4, ...) \
     TRACE_6D(_e, d1, d2, d3, d4)
 
-#define HVMTRACE_ND(evt, modifier, cycles, count, d1, d2, d3, d4, d5, d6) \
+#define HVMTRACE_ND(evt, modifier, cycles, ...)                           \
     do {                                                                  \
         if ( unlikely(tb_init_done) && DO_TRC_HVM_ ## evt )               \
         {                                                                 \
-            struct {                                                      \
-                u32 d[6];                                                 \
-            } _d;                                                         \
-            _d.d[0]=(d1);                                                 \
-            _d.d[1]=(d2);                                                 \
-            _d.d[2]=(d3);                                                 \
-            _d.d[3]=(d4);                                                 \
-            _d.d[4]=(d5);                                                 \
-            _d.d[5]=(d6);                                                 \
+            uint32_t _d[] = { __VA_ARGS__ };                              \
             __trace_var(TRC_HVM_ ## evt | (modifier), cycles,             \
-                        sizeof(*_d.d) * count, &_d);                      \
+                        sizeof(_d), _d);                                  \
         }                                                                 \
     } while(0)
 
 #define HVMTRACE_6D(evt, d1, d2, d3, d4, d5, d6)    \
-    HVMTRACE_ND(evt, 0, 0, 6, d1, d2, d3, d4, d5, d6)
+    HVMTRACE_ND(evt, 0, 0, d1, d2, d3, d4, d5, d6)
 #define HVMTRACE_5D(evt, d1, d2, d3, d4, d5)        \
-    HVMTRACE_ND(evt, 0, 0, 5, d1, d2, d3, d4, d5,  0)
+    HVMTRACE_ND(evt, 0, 0, d1, d2, d3, d4, d5)
 #define HVMTRACE_4D(evt, d1, d2, d3, d4)            \
-    HVMTRACE_ND(evt, 0, 0, 4, d1, d2, d3, d4,  0,  0)
+    HVMTRACE_ND(evt, 0, 0, d1, d2, d3, d4)
 #define HVMTRACE_3D(evt, d1, d2, d3)                \
-    HVMTRACE_ND(evt, 0, 0, 3, d1, d2, d3,  0,  0,  0)
+    HVMTRACE_ND(evt, 0, 0, d1, d2, d3)
 #define HVMTRACE_2D(evt, d1, d2)                    \
-    HVMTRACE_ND(evt, 0, 0, 2, d1, d2,  0,  0,  0,  0)
+    HVMTRACE_ND(evt, 0, 0, d1, d2)
 #define HVMTRACE_1D(evt, d1)                        \
-    HVMTRACE_ND(evt, 0, 0, 1, d1,  0,  0,  0,  0,  0)
+    HVMTRACE_ND(evt, 0, 0, d1)
 #define HVMTRACE_0D(evt)                            \
-    HVMTRACE_ND(evt, 0, 0, 0,  0,  0,  0,  0,  0,  0)
+    HVMTRACE_ND(evt, 0, 0)
 
 #define HVMTRACE_LONG_1D(evt, d1)                  \
                    HVMTRACE_2D(evt ## 64, (d1) & 0xFFFFFFFF, (d1) >> 32)
-- 
2.11.0




 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.