[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-changelog] [xen-unstable] [IA64] Hand optimize for hyperprivop



# HG changeset patch
# User Alex Williamson <alex.williamson@xxxxxx>
# Date 1204920695 25200
# Node ID 43a87df9a11ebb013529571962472c364f7a4808
# Parent  6225df3ff209b3a985e3be8b737d49d948c53691
[IA64] Hand optimize for hyperprivop

This patch slightly optimizes hyperprivop emulation especially hyper_rfi.
It shows about 2% faster in fstat system call on dom0.

Signed-off-by: Kouya Shimura <kouya@xxxxxxxxxxxxxx>
---
 xen/arch/ia64/xen/hyperprivop.S |  101 ++++++++++++++--------------------------
 1 files changed, 36 insertions(+), 65 deletions(-)

diff -r 6225df3ff209 -r 43a87df9a11e xen/arch/ia64/xen/hyperprivop.S
--- a/xen/arch/ia64/xen/hyperprivop.S   Fri Mar 07 13:09:47 2008 -0700
+++ b/xen/arch/ia64/xen/hyperprivop.S   Fri Mar 07 13:11:35 2008 -0700
@@ -67,19 +67,18 @@
 //     r19 == ipsr.cpl
 //     r31 == pr
 GLOBAL_ENTRY(fast_hyperprivop)
+       adds r20=XSI_PSR_I_ADDR_OFS-XSI_PSR_IC_OFS,r18
        // HYPERPRIVOP_SSM_I?
        // assumes domain interrupts pending, so just do it
        cmp.eq p7,p6=HYPERPRIVOP_SSM_I,r17
 (p7)   br.sptk.many hyper_ssm_i;;
 
        // Check pending event indication
-       adds r20=XSI_PSR_I_ADDR_OFS-XSI_PSR_IC_OFS, r18;;
-       ld8 r20=[r20]
+       ld8 r20=[r20]           // interrupt_mask_addr
        ;;
        ld1 r22=[r20],-1        // evtchn_upcall_mask
        ;;
        ld1 r20=[r20]           // evtchn_upcall_pending
-       ;;
 
        // HYPERPRIVOP_RFI?
        cmp.eq p7,p6=HYPERPRIVOP_RFI,r17
@@ -210,9 +209,8 @@ ENTRY(hyper_ssm_i)
        // give up for now if: ipsr.be==1, ipsr.pp==1
        mov r30=cr.ipsr
        mov r29=cr.iip;;
-       extr.u r21=r30,IA64_PSR_PP_BIT,1 ;;
-       cmp.ne p7,p0=r21,r0
-(p7)   br.sptk.many dispatch_break_fault ;;
+       tbit.nz p7,p0=r30,IA64_PSR_PP_BIT
+(p7)   br.spnt.many dispatch_break_fault ;;
 #ifdef FAST_HYPERPRIVOP_CNT
        movl r20=FAST_HYPERPRIVOP_PERFC(HYPERPRIVOP_SSM_I);;
        ld4 r21=[r20];;
@@ -220,8 +218,7 @@ ENTRY(hyper_ssm_i)
        st4 [r20]=r21;;
 #endif
        // set shared_mem iip to instruction after HYPER_SSM_I
-       extr.u r20=r30,IA64_PSR_RI_BIT,2 ;;
-       cmp.eq p6,p7=2,r20 ;;
+       tbit.nz p6,p7=r30,IA64_PSR_RI_BIT+1 ;;  // cr.ipsr.ri >= 2 ?
 (p6)   mov r20=0
 (p6)   adds r29=16,r29
 (p7)   adds r20=1,r20 ;;
@@ -346,8 +343,7 @@ GLOBAL_ENTRY(fast_tick_reflect)
 (p6)   br.cond.spnt.few rp;;
        mov r17=cr.ipsr;;
        // slow path if: ipsr.pp==1
-       extr.u r21=r17,IA64_PSR_PP_BIT,1 ;;
-       cmp.ne p6,p0=r21,r0
+       tbit.nz p6,p0=r17,IA64_PSR_PP_BIT
 (p6)   br.cond.spnt.few rp;;
        // definitely have a domain tick
        mov cr.eoi=r0
@@ -537,8 +533,7 @@ GLOBAL_ENTRY(fast_break_reflect)
 #endif
        mov r30=cr.ipsr
        mov r29=cr.iip;;
-       extr.u r21=r30,IA64_PSR_PP_BIT,1 ;;
-       cmp.ne p7,p0=r21,r0
+       tbit.nz p7,p0=r30,IA64_PSR_PP_BIT
 (p7)   br.spnt.few dispatch_break_fault ;;
         movl r20=IA64_PSR_CPL ;; 
         and r22=r20,r30 ;;
@@ -722,8 +717,7 @@ GLOBAL_ENTRY(fast_access_reflect)
 #endif
        mov r30=cr.ipsr
        mov r29=cr.iip;;
-       extr.u r21=r30,IA64_PSR_PP_BIT,1 ;;
-       cmp.ne p7,p0=r21,r0
+       tbit.nz p7,p0=r30,IA64_PSR_PP_BIT
 (p7)   br.spnt.few dispatch_reflection ;;
        extr.u r21=r30,IA64_PSR_CPL0_BIT,2 ;;
        cmp.eq p7,p0=r21,r0
@@ -769,8 +763,7 @@ GLOBAL_ENTRY(fast_tlb_miss_reflect)
        cmp.eq p7,p0=r21,r0
 (p7)   br.spnt.few page_fault ;;
        // slow path if strange ipsr or isr bits set
-       extr.u r21=r30,IA64_PSR_PP_BIT,1 ;;
-       cmp.ne p7,p0=r21,r0
+       tbit.nz p7,p0=r30,IA64_PSR_PP_BIT,1
 (p7)   br.spnt.few page_fault ;;
        movl r21=IA64_ISR_IR|IA64_ISR_SP|IA64_ISR_NA ;;
        and r21=r16,r21;;
@@ -1023,45 +1016,27 @@ ENTRY(hyper_rfi)
 #ifndef FAST_RFI
        br.spnt.few slow_vcpu_rfi ;;
 #endif
-       // if no interrupts pending, proceed
-       mov r30=r0
-       cmp.eq p7,p0=r20,r0
-(p7)   br.sptk.many 1f
-       ;;
-       adds r20=XSI_IPSR_OFS-XSI_PSR_IC_OFS,r18 ;;
-       ld8 r21=[r20];;         // r21 = vcr.ipsr
-       extr.u r22=r21,IA64_PSR_I_BIT,1 ;;
-       mov r30=r22;;
-       // r30 determines whether we might deliver an immediate extint
-#ifndef RFI_TO_INTERRUPT // see beginning of file
-       cmp.ne p6,p0=r30,r0
-(p6)   br.cond.spnt.few slow_vcpu_rfi ;;
-#endif
-1:
-       adds r20=XSI_IPSR_OFS-XSI_PSR_IC_OFS,r18 ;;
-       ld8 r21=[r20];;         // r21 = vcr.ipsr
+       // if interrupts pending and vcr.ipsr.i=1, do it the slow way
+       adds r19=XSI_IPSR_OFS-XSI_PSR_IC_OFS,r18
+       adds r23=XSI_METAPHYS_OFS-XSI_PSR_IC_OFS,r18
+       cmp.ne p8,p0=r20,r0;;   // evtchn_upcall_pending != 0
        // if (!(vpsr.dt && vpsr.rt && vpsr.it)), do it the slow way
-       movl r20=(IA64_PSR_DT|IA64_PSR_RT|IA64_PSR_IT);;
-       and r22=r20,r21
-       ;;
-       cmp.ne p7,p0=r22,r20
-(p7)   br.spnt.few slow_vcpu_rfi ;;
+       ld8 r21=[r19],XSI_IIP_OFS-XSI_IPSR_OFS // r21=vcr.ipsr
+       movl r20=~(IA64_PSR_DT|IA64_PSR_RT|IA64_PSR_IT);;
+       or r20=r20,r21
+       // p8 determines whether we might deliver an immediate extint
+(p8)   tbit.nz p8,p0=r21,IA64_PSR_I_BIT;;
+       cmp.ne p7,p0=-1,r20
+       ld4 r23=[r23]   // r23=metaphysical_mode
+#ifndef RFI_TO_INTERRUPT       // see beginning of file
+(p8)   br.cond.spnt.few slow_vcpu_rfi
+#endif
+(p7)   br.spnt.few slow_vcpu_rfi;;
        // if was in metaphys mode, do it the slow way (FIXME later?)
-       adds r20=XSI_METAPHYS_OFS-XSI_PSR_IC_OFS,r18 ;;
-       ld4 r20=[r20];;
-       cmp.ne p7,p0=r20,r0
-(p7)   br.spnt.few slow_vcpu_rfi ;;
-#if 0
-       // if domain hasn't already done virtual bank switch
-       //  do it the slow way (FIXME later?)
-       adds r20=XSI_BANKNUM_OFS-XSI_PSR_IC_OFS,r18 ;;
-       ld4 r20=[r20];;
-       cmp.eq p7,p0=r20,r0
-(p7)   br.spnt.few slow_vcpu_rfi ;;
-#endif
-       adds r20=XSI_IIP_OFS-XSI_PSR_IC_OFS,r18 ;;
-       ld8 r22=[r20];;
-1:     // OK now, let's do an rfi.
+       cmp.ne p7,p0=r23,r0
+       ld8 r22=[r19]   // r22=vcr.iip
+(p7)   br.spnt.few slow_vcpu_rfi;;
+       // OK now, let's do an rfi.
 #ifdef FAST_HYPERPRIVOP_CNT
        movl r20=FAST_HYPERPRIVOP_PERFC(HYPERPRIVOP_RFI);;
        ld4 r23=[r20];;
@@ -1070,8 +1045,7 @@ 1:        // OK now, let's do an rfi.
 #endif
 #ifdef RFI_TO_INTERRUPT
        // maybe do an immediate interrupt delivery?
-       cmp.ne p6,p0=r30,r0
-(p6)   br.cond.spnt.few rfi_check_extint;;
+(p8)   br.cond.spnt.few rfi_check_extint;;
 #endif
 
 just_do_rfi:
@@ -1090,15 +1064,13 @@ just_do_rfi:
        // vpsr.i = vcr.ipsr.i; vpsr.ic = vcr.ipsr.ic
        ld8 r20=[r20]
        mov r19=1 
-       extr.u r23=r21,IA64_PSR_I_BIT,1 ;;
-       cmp.ne p7,p6=r23,r0 ;;
+       tbit.nz p7,p6=r21,IA64_PSR_I_BIT
+       tbit.nz p9,p8=r21,IA64_PSR_IC_BIT;;
        // not done yet
 (p7)   st1 [r20]=r0
-(p6)   st1 [r20]=r19;;
-       extr.u r23=r21,IA64_PSR_IC_BIT,1 ;;
-       cmp.ne p7,p6=r23,r0 ;;
-(p7)   st4 [r18]=r19;;
-(p6)   st4 [r18]=r0;;
+(p6)   st1 [r20]=r19
+(p9)   st4 [r18]=r19
+(p8)   st4 [r18]=r0
        // force on psr.ic, i, dt, rt, it, bn
        movl r20=(IA64_PSR_I|IA64_PSR_IC|IA64_PSR_DT|IA64_PSR_RT| \
                  IA64_PSR_IT|IA64_PSR_BN)
@@ -1228,9 +1200,8 @@ ENTRY(rfi_check_extint)
        // r26 now contains the vector [0..255]
        adds r20=XSI_TPR_OFS-XSI_PSR_IC_OFS,r18 ;;
        ld8 r20=[r20] ;;
-       extr.u r28=r20,16,1
-       extr.u r29=r20,4,4 ;;
-       cmp.ne p6,p0=r28,r0     // if tpr.mmi is set, just rfi
+       extr.u r29=r20,4,4
+       tbit.nz p6,p0=r20,16    // if tpr.mmi is set, just rfi
 (p6)   br.cond.spnt.few just_do_rfi;;
        shl r29=r29,4;;
        adds r29=15,r29;;

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.