[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[PATCH 4/5] xen/wait: Use relative stack adjustments


  • To: Xen-devel <xen-devel@xxxxxxxxxxxxxxxxxxxx>
  • From: Andrew Cooper <andrew.cooper3@xxxxxxxxxx>
  • Date: Mon, 18 Jul 2022 08:18:24 +0100
  • Authentication-results: esa2.hc3370-68.iphmx.com; dkim=none (message not signed) header.i=none
  • Cc: Andrew Cooper <andrew.cooper3@xxxxxxxxxx>, Jan Beulich <JBeulich@xxxxxxxx>, Roger Pau Monné <roger.pau@xxxxxxxxxx>, Wei Liu <wl@xxxxxxx>
  • Delivery-date: Mon, 18 Jul 2022 07:18:57 +0000
  • Ironport-data: A9a23:zjBJsa2tB0xEkJQofPbDix16xS7N8HFdZTh5A9k8jSZ0cfkRliZWkhF6PiwklHlZu+rW2YndT/em3FGgmSslSUnMXd4QO/PJHhVrmHATwQEHUHVE2NFApiHviEdWNnjUaBy65wBs81Eismu0A9pSy5YFciCEkjMPwJ7ek37nVVutO35b33UT7JRP5F/H3jn+KVRCr5/xrZZxp+VXVcLF2F/lFq3KSUyb0H8LcL8SdO1n3lp5gr09/uS/VfPfvYA9iS3hT7yfGtBZyU31dpkBNrx7mkC4lYI8xl9BbuzQl74WYbXOwNE/LrirTkv/hEMReKhLiR+VXP5Z+PtEzTU6wWRo/NhkDABzB3T4+aL8uBx9xnwALmCCVJBPKATbm+55KHZrd4jNRVensDLN5KJ1Y6xEWpQzxsWc5HfteSE8KUCIRl/fIwRaGMAsL/A3WrdvC88c/l0lIPjXh23vQYIm3LxsIRS7q8w01GL6FhGa6/6SXuQGnlFXp4T8Ji5Pt0qELmP4LT09DwL1nc7phsLmE9/PePhpT6Ud1kUBu09lXuDlpZ+xCmm8rIQEhZ7j4u+TayUflwoG+NwI4z/9IKnKb0cFF4+9zymJqLwW1Wnrc//O+P9cjXfFrPkKSUqQm2EggxRtnS4E/6WVUlJrarcs5WB/VN1P/ZAXBxdJHGF3ZCUNGeaP2BXb2zaplsufyb+QxJFWdAd2AKlTk6hmAtFfGCY3BqecuUIAeOODnFRSRD5El1bZkVTeABXJEPLNd6CmcKiocVKHTNs1Rtv+ib0aow1Vd0Yv32hmzbpopmL+T6n4Aa8AVrHNePMiFAEcM32wAr6S6dapPO3yV3uMjFv6Ys88F8D7hZo936QdpG89j2JPVrhuBki62RiKdIFqrKlnavZ/bY657+c1P0iwmUgF9QIKc4jf6twjk8fpsaxq+Giw3iXdS68a5dxfndxD3hcJfd/wNlkKM5LF7u0ozPQ7Ez77Jl+YmJwAhQI1EkXMrCr08HlFkRWHYITsYZaSi2ElyS0RC9cKwnNjQEmQ2HMSjMrSInTqrdQjf+riPAUd+l8mNMHVbXcIXHDycDBQse6Ge8bg9VxNOzDtekFMDffAlheffciXvURsKXmLqcbiYkn4WewPfWmZfqzlVyBvIrUWimBC2wBsY1lCh8k2L2jiJKs6Al4i018ucgPgxFmvAtc0uyjgJ5STKYWPAmnX27nXbh9s9dWyo2btBF4JolKewyKnxDWTKc7IbTZXOLcM8xw4dLBHJ4U5irDGZSfpCErn3f4IU4cmTxMgwifYs04mdMMGMQTLyr/6MdJyQEA2j5uuxVn40FcGIB0CuzDrO45+s52Ra633kXyOeUecITqzRciQrgQ1W0OoyawiQerdoeH0bJwZSDZH28HIVc7UaZGK4buvxPDiIOhldnMURbQj8s2ZDSXG5OLr9fEWX1nI36xi9QnDc7DZH8/GVR99AO9doyOuS1XDLNLwsiuh0pNr8n2WYiScvxMph9l9L8q0O24BqBKIMlalyS4MuQZ21pcl+7pZOwDGzoNGu6mw2XodioYsE52k+95+n5vAiQ8n40yjzVLk6zardi15ONIP6BmCHVyscY0b5B/8Gj89HB7vxzzp1sGb1qGR/EaOwbAa6TGikImFi9Dn2iV+vWsizV5GHj0BIc08AUTtwB5HHVScAGy2hNmS5u8QCzMbSMOXQiTGa3P9x3rtOHMSePyfCBDtS7A49pq2qbVqjvTqlOyhDwZEpz693zP4aekwr9NbEctmYzP5o8ssw5YIW8yqTqsDy3lo5aJEjsNqcdoc5qJl/IOkFXuj3N6ka7Z49rFDWBIEJkyh00D/Qd+h99JpNijBx/QGjyb544knApj9Ejal+CU0Ywh20BqUNi7eKaUcqHJA/ofDDxeKFpJk5EL4wYvctZslSfJJ4R3JyAc4c5s1TS+3GixP7gZxI/icX+Na5bY8xIPrhWMW7hC8A72hBrVvHbt79AuNDhw26dB10X/ckN7JR8mblrdqMfrVtZbolwZFcU7nqfLSK8j8rbrOLzIWpUeHZy91oDwz4nfAE3JecsOKzTdqu1C9AkQCD+8XC/mUfPBOhE2AmA0IJ4GFX289K03lJPx0aR3997VDbHIasgdyPe3+pkEwtUXw26Jor0+X4bXD7xZ28BjVchK/lyrWU/ujyFtTBZ3e7NWsnlQQTWIeGwUrodpuJrSGaaLo9GGoxT19Nv3uIp4ox9nRMgHxAOQTJp2OZ63K4KtgZo69spRrkTbnOQSfVRrxN12Er5y4z4b/E3BsOIHhLAFB7kdpPCsz7iZfLsgYuVEXz7UQCGOFCSEIzBob9f2riyi5KiDJrg9jSpp9yxa0SV7oOaZZub+vcX+RGabo8BPqd4Iou1q9SsQO0znSDXzPbobCCTDCaL+MN2eEXbefTVQzeXq+3/1g6+OE/b/UgQWHkjK2JGEMaTjKqH2ozj4SInCJAUhMPApaPU53NqC5Vgf7RlsOJJ9XxseJneJi6YTkHGtDva8UAa47lvKzc2ZMLHNHfVPYG6Q5gg/Rrd41LgRGvEI5NmNuy3YQwZqtSB3mgzFhIoshSkkfmg6fvAGkhao89CkGoTqbJafEAy0842u7U8M7jxVpIVtopct0AuiWcTNdhKh0d0Lz8pm1so0Z8FrzDCepLYKVW6cdf3FZ5JzO7gB7Wdgwrbip3We7+UJn
  • List-id: Xen developer discussion <xen-devel.lists.xenproject.org>

The waitqueue's esp field is overloaded.  It serves both as an indication that
the waitqueue is in use, and as a direction to check_wakeup_from_wait() as to
where to adjust the stack pointer to, but using an absolute pointer comes with
a cost if requiring the vCPU to wake up on the same pCPU it went to sleep on.

Instead, have the waitqueue just keep track of how much data is on wqv->stack.
This is no practical change in __prepare_to_wait() (it already calculated the
delta) but split the result out of the (also overloaded) %rsi output parameter
by using a separate register instead.

check_wakeup_from_wait() has a bit more work to do.  It now needs to calculate
the adjustment to %rsp rather than having the new %rsp provided as a
parameter.

Signed-off-by: Andrew Cooper <andrew.cooper3@xxxxxxxxxx>
---
CC: Jan Beulich <JBeulich@xxxxxxxx>
CC: Roger Pau Monné <roger.pau@xxxxxxxxxx>
CC: Wei Liu <wl@xxxxxxx>
---
 xen/common/wait.c | 44 ++++++++++++++++++++++++++++----------------
 1 file changed, 28 insertions(+), 16 deletions(-)

diff --git a/xen/common/wait.c b/xen/common/wait.c
index 4bc030d1a09d..4f1daf650bc4 100644
--- a/xen/common/wait.c
+++ b/xen/common/wait.c
@@ -32,8 +32,8 @@ struct waitqueue_vcpu {
      * Xen/x86 does not have per-vcpu hypervisor stacks. So we must save the
      * hypervisor context before sleeping (descheduling), setjmp/longjmp-style.
      */
-    void *esp;
     char *stack;
+    unsigned int used;
 #endif
 };
 
@@ -121,11 +121,11 @@ void wake_up_all(struct waitqueue_head *wq)
 
 static void __prepare_to_wait(struct waitqueue_vcpu *wqv)
 {
-    struct cpu_info *cpu_info = get_cpu_info();
     struct vcpu *curr = current;
     unsigned long dummy;
+    unsigned int used;
 
-    ASSERT(wqv->esp == 0);
+    ASSERT(wqv->used == 0);
 
     /* Save current VCPU affinity; force wakeup on *this* CPU only. */
     if ( vcpu_temporary_affinity(curr, smp_processor_id(), VCPU_AFFINITY_WAIT) 
)
@@ -154,24 +154,25 @@ static void __prepare_to_wait(struct waitqueue_vcpu *wqv)
         "push %%rbx; push %%rbp; push %%r12;"
         "push %%r13; push %%r14; push %%r15;"
 
-        "sub %%esp,%%ecx;"
+        "sub %%esp, %%ecx;" /* ecx = delta to cpu_info */
         "cmp %[sz], %%ecx;"
         "ja .L_skip;"       /* Bail if >4k */
-        "mov %%rsp,%%rsi;"
+
+        "mov %%ecx, %%eax;"
+        "mov %%rsp, %%rsi;" /* Copy from the stack, into wqv->stack */
 
         /* check_wakeup_from_wait() longjmp()'s to this point. */
         ".L_wq_resume: rep movsb;"
-        "mov %%rsp,%%rsi;"
 
         ".L_skip:"
         "pop %%r15; pop %%r14; pop %%r13;"
         "pop %%r12; pop %%rbp; pop %%rbx;"
-        : "=&S" (wqv->esp), "=&c" (dummy), "=&D" (dummy)
-        : "0" (0), "1" (cpu_info), "2" (wqv->stack),
+        : "=a" (used), "=D" (dummy),     "=c" (dummy),         "=&S" (dummy)
+        : "a" (0),     "D" (wqv->stack), "c" (get_cpu_info()),
           [sz] "i" (PAGE_SIZE)
-        : "memory", "rax", "rdx", "r8", "r9", "r10", "r11" );
+        : "memory", "rdx", "r8", "r9", "r10", "r11" );
 
-    if ( unlikely(wqv->esp == 0) )
+    if ( unlikely(used > PAGE_SIZE) )
     {
         gdprintk(XENLOG_ERR, "Stack too large in %s\n", __func__);
         domain_crash(curr->domain);
@@ -179,11 +180,13 @@ static void __prepare_to_wait(struct waitqueue_vcpu *wqv)
         for ( ; ; )
             do_softirq();
     }
+
+    wqv->used = used;
 }
 
 static void __finish_wait(struct waitqueue_vcpu *wqv)
 {
-    wqv->esp = NULL;
+    wqv->used = 0;
     vcpu_temporary_affinity(current, NR_CPUS, VCPU_AFFINITY_WAIT);
 }
 
@@ -191,10 +194,11 @@ void check_wakeup_from_wait(void)
 {
     struct vcpu *curr = current;
     struct waitqueue_vcpu *wqv = curr->waitqueue_vcpu;
+    unsigned long tmp;
 
     ASSERT(list_empty(&wqv->list));
 
-    if ( likely(wqv->esp == NULL) )
+    if ( likely(!wqv->used) )
         return;
 
     /* Check if we are still pinned. */
@@ -220,14 +224,22 @@ void check_wakeup_from_wait(void)
      * the rep movs in __prepare_to_wait(), it copies from wqv->stack over the
      * active stack.
      *
+     * We are also bound by __prepare_to_wait()'s output constraints, so %eax
+     * needs to be wqv->used.
+     *
      * All other GPRs are available for use; they're either restored from
      * wqv->stack or explicitly clobbered.
      */
-    asm volatile ( "mov %%rdi, %%rsp;"
+    asm volatile ( "sub %%esp, %k[var];" /* var = delta to cpu_info */
+                   "neg %k[var];"
+                   "add %%ecx, %k[var];" /* var = -delta + wqv->used */
+
+                   "sub %[var], %%rsp;"  /* Adjust %rsp down to make room */
+                   "mov %%rsp, %%rdi;"   /* Copy from wqv->stack, into the 
stack */
                    "jmp .L_wq_resume;"
-                   :
-                   : "S" (wqv->stack), "D" (wqv->esp),
-                     "c" ((char *)get_cpu_info() - (char *)wqv->esp)
+                   : "=D" (tmp), [var] "=&r" (tmp)
+                   : "S" (wqv->stack), "c" (wqv->used), "a" (wqv->used),
+                     "[var]" (get_cpu_info())
                    : "memory" );
     unreachable();
 }
-- 
2.11.0




 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.