[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-changelog] Add fast path for thash hyperprivop



# HG changeset patch
# User djm@xxxxxxxxxxxxxxx
# Node ID 89d92ce1092462f1999221d2615a9976d78bd17b
# Parent  0e774127646895866311f9f617b38577891bf9b9

Add fast path for thash hyperprivop
Add fast path for ptc.ga hyperprivop
Add fast path for iaccess/daccess reflect

diff -r 0e7741276468 -r 89d92ce10924 xen/arch/ia64/hyperprivop.S
--- a/xen/arch/ia64/hyperprivop.S       Sat Jul  9 14:36:13 2005
+++ b/xen/arch/ia64/hyperprivop.S       Sat Jul  9 14:37:13 2005
@@ -124,6 +124,10 @@
        // HYPERPRIVOP_ITC_I?
        cmp.eq p7,p6=XEN_HYPER_ITC_I,r17
 (p7)   br.sptk.many hyper_itc_i;;
+
+       // HYPERPRIVOP_THASH?
+       cmp.eq p7,p6=XEN_HYPER_THASH,r17
+(p7)   br.sptk.many hyper_thash;;
 
        // if not one of the above, give up for now and do it the slow way
        br.sptk.many dispatch_break_fault ;;
@@ -440,7 +444,6 @@
 END(fast_tick_reflect)
 
 // reflect domain breaks directly to domain
-// FIXME: DOES NOT WORK YET
 //     r16 == cr.isr
 //     r17 == cr.iim
 //     r18 == XSI_PSR_IC
@@ -471,15 +474,30 @@
        cmp.eq p7,p0=r22,r17;
 (p7)   br.spnt.few dispatch_break_fault ;;
 #endif
-#ifdef FAST_REFLECT_CNT
-       movl r20=fast_reflect_count+((0x2c00>>8)*8);;
-       ld8 r21=[r20];;
-       adds r21=1,r21;;
-       st8 [r20]=r21;;
-#endif
+       movl r20=0x2c00;
        // save iim in shared_info
        adds r21=XSI_IIM_OFS-XSI_PSR_IC_OFS,r18 ;;
        st8 [r21]=r17;;
+       // fall through
+
+
+// reflect to domain ivt+r20
+// sets up isr,iip,ipsr,ifs (FIXME: do iipa too)
+//     r16 == cr.isr
+//     r18 == XSI_PSR_IC
+//     r20 == offset into ivt
+//     r29 == iip
+//     r30 == ipsr
+//     r31 == pr
+ENTRY(fast_reflect)
+#ifdef FAST_REFLECT_CNT
+       movl r22=fast_reflect_count;
+       shr r23=r20,5;;
+       add r22=r22,r23;;
+       ld8 r21=[r22];;
+       adds r21=1,r21;;
+       st8 [r22]=r21;;
+#endif
        // save iip in shared_info (DON'T POINT TO NEXT INSTRUCTION!)
        adds r21=XSI_IIP_OFS-XSI_PSR_IC_OFS,r18 ;;
        st8 [r21]=r29;;
@@ -504,9 +522,9 @@
        or r30=r30,r28;;
        and r30=r30,r27;;
        // also set shared_mem ipsr.i and ipsr.ic appropriately
-       ld8 r20=[r18];;
-       extr.u r22=r20,32,32
-       cmp4.eq p6,p7=r20,r0;;
+       ld8 r24=[r18];;
+       extr.u r22=r24,32,32
+       cmp4.eq p6,p7=r24,r0;;
 (p6)   dep r30=0,r30,IA64_PSR_IC_BIT,1
 (p7)   dep r30=-1,r30,IA64_PSR_IC_BIT,1 ;;
        cmp4.eq p6,p7=r22,r0;;
@@ -520,13 +538,13 @@
        // cover and set shared_mem precover_ifs to cr.ifs
        // set shared_mem ifs and incomplete_regframe to 0
        cover ;;
-       mov r20=cr.ifs;;
+       mov r24=cr.ifs;;
        adds r21=XSI_INCOMPL_REG_OFS-XSI_PSR_IC_OFS,r18 ;;
        st4 [r21]=r0 ;;
        adds r21=XSI_IFS_OFS-XSI_PSR_IC_OFS,r18 ;;
        st8 [r21]=r0 ;;
        adds r21=XSI_PRECOVER_IFS_OFS-XSI_PSR_IC_OFS,r18 ;;
-       st8 [r21]=r20 ;;
+       st8 [r21]=r24 ;;
        // vpsr.i = vpsr.ic = 0 on delivery of interruption
        st8 [r18]=r0;;
        // FIXME: need to save iipa and isr to be arch-compliant
@@ -534,22 +552,30 @@
        mov r22=IA64_KR(CURRENT);;
        adds r22=IA64_VCPU_IVA_OFFSET,r22;;
        ld8 r23=[r22];;
-       movl r24=0x2c00;;
-       add r24=r24,r23;;
-       mov cr.iip=r24;;
+       add r20=r20,r23;;
+       mov cr.iip=r20;;
        // OK, now all set to go except for switch to virtual bank0
        mov r30=r2; mov r29=r3;;
        adds r2=XSI_BANK1_OFS-XSI_PSR_IC_OFS,r18;
        adds r3=(XSI_BANK1_OFS+8)-XSI_PSR_IC_OFS,r18;;
        bsw.1;;
-       st8 [r2]=r16,16; st8 [r3]=r17,16 ;;
-       st8 [r2]=r18,16; st8 [r3]=r19,16 ;;
-       st8 [r2]=r20,16; st8 [r3]=r21,16 ;;
-       st8 [r2]=r22,16; st8 [r3]=r23,16 ;;
-       st8 [r2]=r24,16; st8 [r3]=r25,16 ;;
-       st8 [r2]=r26,16; st8 [r3]=r27,16 ;;
-       st8 [r2]=r28,16; st8 [r3]=r29,16 ;;
-       st8 [r2]=r30,16; st8 [r3]=r31,16 ;;
+       // FIXME: need to handle ar.unat!
+       .mem.offset 0,0; st8.spill [r2]=r16,16;
+       .mem.offset 8,0; st8.spill [r3]=r17,16 ;;
+       .mem.offset 0,0; st8.spill [r2]=r18,16;
+       .mem.offset 8,0; st8.spill [r3]=r19,16 ;;
+       .mem.offset 0,0; st8.spill [r2]=r20,16;
+       .mem.offset 8,0; st8.spill [r3]=r21,16 ;;
+       .mem.offset 0,0; st8.spill [r2]=r22,16;
+       .mem.offset 8,0; st8.spill [r3]=r23,16 ;;
+       .mem.offset 0,0; st8.spill [r2]=r24,16;
+       .mem.offset 8,0; st8.spill [r3]=r25,16 ;;
+       .mem.offset 0,0; st8.spill [r2]=r26,16;
+       .mem.offset 8,0; st8.spill [r3]=r27,16 ;;
+       .mem.offset 0,0; st8.spill [r2]=r28,16;
+       .mem.offset 8,0; st8.spill [r3]=r29,16 ;;
+       .mem.offset 0,0; st8.spill [r2]=r30,16;
+       .mem.offset 8,0; st8.spill [r3]=r31,16 ;;
        movl r31=XSI_IPSR;;
        bsw.0 ;;
        mov r2=r30; mov r3=r29;;
@@ -558,6 +584,41 @@
        mov pr=r31,-1 ;;
        rfi
        ;;
+
+// reflect access faults (0x2400,0x2800,0x5300) directly to domain
+//     r16 == isr
+//     r17 == ifa
+//     r19 == reflect number (only pass-thru to dispatch_reflection)
+//     r20 == offset into ivt
+//     r31 == pr
+GLOBAL_ENTRY(fast_access_reflect)
+       mov r30=cr.ipsr;;
+       mov r29=cr.iip;;
+       extr.u r21=r30,IA64_PSR_BE_BIT,1 ;;
+       cmp.ne p7,p0=r21,r0
+(p7)   br.spnt.few dispatch_reflection ;;
+       extr.u r21=r30,IA64_PSR_PP_BIT,1 ;;
+       cmp.ne p7,p0=r21,r0
+(p7)   br.spnt.few dispatch_reflection ;;
+       extr.u r21=r30,IA64_PSR_CPL0_BIT,2 ;;
+       cmp.eq p7,p0=r21,r0
+(p7)   br.spnt.few dispatch_reflection ;;
+       movl r18=XSI_PSR_IC;;
+       ld8 r21=[r18];;
+       cmp.eq p7,p0=r0,r21
+(p7)   br.spnt.few dispatch_reflection ;;
+       // set shared_mem ifa, FIXME: should we validate it?
+       mov r17=cr.ifa;;
+       adds r21=XSI_IFA_OFS-XSI_PSR_IC_OFS,r18 ;; 
+       st8 [r21]=r17 ;;
+       // get rr[ifa] and save to itir in shared memory (extra bits ignored)
+       shr.u r22=r17,61
+       adds r23=XSI_ITIR_OFS-XSI_PSR_IC_OFS,r18 
+       adds r21=XSI_RR0_OFS-XSI_PSR_IC_OFS,r18 ;;
+       shladd r22=r22,3,r21;;
+       ld8 r22=[r22];;
+       st8 [r23]=r22;;
+       br.cond.sptk.many fast_reflect;;
 
 
 // ensure that, if giving up, registers at entry to fast_hyperprivop unchanged
@@ -1312,8 +1373,146 @@
        ;;
 END(hyper_set_rr)
 
+// this routine was derived from optimized assembly output from
+// vcpu_thash so it is dense and difficult to read but it works
+// On entry:
+//     r18 == XSI_PSR_IC
+//     r31 == pr
+GLOBAL_ENTRY(hyper_thash)
+#ifdef FAST_HYPERPRIVOP_CNT
+       movl r20=fast_hyperpriv_cnt+(8*XEN_HYPER_THASH);;
+       ld8 r21=[r20];;
+       adds r21=1,r21;;
+       st8 [r20]=r21;;
+#endif
+       shr.u r20 = r8, 61
+       addl r25 = 1, r0
+       movl r17 = 0xe000000000000000
+       ;;
+       and r21 = r17, r8               // VHPT_Addr1
+       ;;
+       shladd r28 = r20, 3, r18
+       adds r19 = XSI_PTA_OFS-XSI_PSR_IC_OFS, r18
+       ;;
+       adds r27 = XSI_RR0_OFS-XSI_PSR_IC_OFS, r28
+       addl r28 = 32767, r0
+       ld8 r24 = [r19]                 // pta
+       ;;
+       ld8 r23 = [r27]                 // rrs[vadr>>61]
+       extr.u r26 = r24, 2, 6
+       ;;
+       extr.u r22 = r23, 2, 6
+       shl r30 = r25, r26
+       ;;
+       shr.u r19 = r8, r22
+       shr.u r29 = r24, 15
+       ;;
+       adds r17 = -1, r30
+       ;;
+       shladd r27 = r19, 3, r0
+       extr.u r26 = r17, 15, 46
+       ;;
+       andcm r24 = r29, r26
+       and r19 = r28, r27
+       shr.u r25 = r27, 15
+       ;;
+       and r23 = r26, r25
+       ;;
+       or r22 = r24, r23
+       ;;
+       dep.z r20 = r22, 15, 46
+       ;;
+       or r16 = r20, r21
+       ;;
+       or r8 = r19, r16
+       // done, update iip/ipsr to next instruction
+       mov r24=cr.ipsr
+       mov r25=cr.iip;;
+       extr.u r26=r24,41,2 ;;
+       cmp.eq p6,p7=2,r26 ;;
+(p6)   mov r26=0
+(p6)   adds r25=16,r25
+(p7)   adds r26=1,r26
+       ;;
+       dep r24=r26,r24,41,2
+       ;;
+       mov cr.ipsr=r24
+       mov cr.iip=r25
+       mov pr=r31,-1 ;;
+       rfi
+       ;;
+END(hyper_thash)
+
 ENTRY(hyper_ptc_ga)
-       br.spnt.many dispatch_break_fault ;;
+#ifdef CONFIG_SMP
+FIXME: ptc.ga instruction requires spinlock for SMP
+#endif
+       // FIXME: validate not flushing Xen addresses
+#ifdef FAST_HYPERPRIVOP_CNT
+       movl r20=fast_hyperpriv_cnt+(8*XEN_HYPER_PTC_GA);;
+       ld8 r21=[r20];;
+       adds r21=1,r21;;
+       st8 [r20]=r21;;
+#endif
+       mov r28=r8
+       extr.u r19=r9,2,6               // addr_range=1<<((r9&0xfc)>>2)
+       mov r20=1
+       shr.u r24=r8,61
+       addl r27=56,r0                  // PAGE_SHIFT<<2 (for ptc.ga)
+       movl r26=0x8000000000000000     // INVALID_TI_TAG
+       mov r30=ar.lc
+       ;;
+       shl r19=r20,r19
+       cmp.eq p7,p0=7,r24
+(p7)   br.spnt.many dispatch_break_fault ;;    // slow way for rr7
+       ;;
+       cmp.le p7,p0=r19,r0             // skip flush if size<=0
+(p7)   br.cond.dpnt 2f ;;
+       extr.u r24=r19,0,PAGE_SHIFT
+       shr.u r23=r19,PAGE_SHIFT ;;     // repeat loop for n pages
+       cmp.ne p7,p0=r24,r0 ;;
+(p7)   adds r23=1,r23 ;;               // n_pages<size<n_pages+1? extra iter
+       mov ar.lc=r23
+       movl r29=PAGE_SIZE;;
+1:
+       thash r25=r28 ;;
+       adds r25=16,r25 ;;
+       ld8 r24=[r25] ;;
+       // FIXME: should check if tag matches, not just blow it away
+       or r24=r26,r24 ;;               // vhpt_entry->ti_tag = 1
+       st8 [r25]=r24
+       ptc.ga r28,r27 ;;
+       srlz.i ;;
+       add r28=r29,r28
+       br.cloop.sptk.few 1b
+       ;;
+2:
+       mov ar.lc=r30 ;;
+       mov r29=cr.ipsr
+       mov r30=cr.iip;;
+       mov r27=IA64_KR(CURRENT);;
+       adds r25=IA64_VCPU_DTLB_OFFSET,r27
+       adds r26=IA64_VCPU_ITLB_OFFSET,r27;;
+       ld8 r24=[r25]
+       ld8 r27=[r26] ;;
+       and r24=-2,r24
+       and r27=-2,r27 ;;
+       st8 [r25]=r24                   // set 1-entry i/dtlb as not present
+       st8 [r26]=r27 ;;
+       // increment to point to next instruction
+       extr.u r26=r29,41,2 ;;
+       cmp.eq p6,p7=2,r26 ;;
+(p6)   mov r26=0
+(p6)   adds r30=16,r30
+(p7)   adds r26=1,r26
+       ;;
+       dep r29=r26,r29,41,2
+       ;;
+       mov cr.ipsr=r29
+       mov cr.iip=r30
+       mov pr=r31,-1 ;;
+       rfi
+       ;;
 END(hyper_ptc_ga)
 
 ENTRY(hyper_itc_d)
@@ -1323,5 +1522,3 @@
 ENTRY(hyper_itc_i)
        br.spnt.many dispatch_break_fault ;;
 END(hyper_itc_i)
-
-// ignore me
diff -r 0e7741276468 -r 89d92ce10924 xen/arch/ia64/ivt.S
--- a/xen/arch/ia64/ivt.S       Sat Jul  9 14:36:13 2005
+++ b/xen/arch/ia64/ivt.S       Sat Jul  9 14:37:13 2005
@@ -666,7 +666,12 @@
 // 0x2400 Entry 9 (size 64 bundles) Instruction Access-bit (27)
 ENTRY(iaccess_bit)
 #ifdef XEN
-       REFLECT(9)
+       mov r31=pr;
+       mov r16=cr.isr
+       mov r17=cr.ifa
+       mov r19=9
+       movl r20=0x2400
+       br.sptk.many fast_access_reflect;;
 #endif
        DBG_FAULT(9)
        // Like Entry 8, except for instruction access
@@ -734,7 +739,12 @@
 // 0x2800 Entry 10 (size 64 bundles) Data Access-bit (15,55)
 ENTRY(daccess_bit)
 #ifdef XEN
-       REFLECT(10)
+       mov r31=pr;
+       mov r16=cr.isr
+       mov r17=cr.ifa
+       mov r19=10
+       movl r20=0x2800
+       br.sptk.many fast_access_reflect;;
 #endif
        DBG_FAULT(10)
        // Like Entry 8, except for data access
@@ -1395,7 +1405,12 @@
 // 0x5300 Entry 23 (size 16 bundles) Data Access Rights (14,53)
 ENTRY(daccess_rights)
 #ifdef XEN
-       REFLECT(23)
+       mov r31=pr;
+       mov r16=cr.isr
+       mov r17=cr.ifa
+       mov r19=23
+       movl r20=0x5300
+       br.sptk.many fast_access_reflect;;
 #endif
        DBG_FAULT(23)
        mov r16=cr.ifa
@@ -1821,7 +1836,7 @@
 
 #ifdef XEN
        .org ia64_ivt+0x8000
-ENTRY(dispatch_reflection)
+GLOBAL_ENTRY(dispatch_reflection)
        /*
         * Input:
         *      psr.ic: off
diff -r 0e7741276468 -r 89d92ce10924 xen/arch/ia64/asm-offsets.c
--- a/xen/arch/ia64/asm-offsets.c       Sat Jul  9 14:36:13 2005
+++ b/xen/arch/ia64/asm-offsets.c       Sat Jul  9 14:37:13 2005
@@ -46,6 +46,8 @@
        DEFINE(XSI_PSR_IC, (SHAREDINFO_ADDR+offsetof(vcpu_info_t, 
arch.interrupt_collection_enabled)));
        DEFINE(XSI_PSR_I_OFS, offsetof(vcpu_info_t, 
arch.interrupt_delivery_enabled));
        DEFINE(XSI_IIP_OFS, offsetof(vcpu_info_t, arch.iip));
+       DEFINE(XSI_IFA_OFS, offsetof(vcpu_info_t, arch.ifa));
+       DEFINE(XSI_ITIR_OFS, offsetof(vcpu_info_t, arch.itir));
        DEFINE(XSI_IPSR, (SHAREDINFO_ADDR+offsetof(vcpu_info_t, arch.ipsr)));
        DEFINE(XSI_IPSR_OFS, offsetof(vcpu_info_t, arch.ipsr));
        DEFINE(XSI_IFS_OFS, offsetof(vcpu_info_t, arch.ifs));
@@ -61,6 +63,7 @@
        DEFINE(XSI_PEND_OFS, offsetof(vcpu_info_t, arch.pending_interruption));
        DEFINE(XSI_RR0_OFS, offsetof(vcpu_info_t, arch.rrs[0]));
        DEFINE(XSI_TPR_OFS, offsetof(vcpu_info_t, arch.tpr));
+       DEFINE(XSI_PTA_OFS, offsetof (vcpu_info_t, arch.pta));
        DEFINE(XSI_ITV_OFS, offsetof(vcpu_info_t, arch.itv));
        //DEFINE(IA64_TASK_BLOCKED_OFFSET,offsetof (struct task_struct, 
blocked));
        //DEFINE(IA64_TASK_CLEAR_CHILD_TID_OFFSET,offsetof (struct task_struct, 
clear_child_tid));
@@ -85,6 +88,8 @@
        DEFINE(IA64_VCPU_ENDING_RID_OFFSET, offsetof (struct vcpu, 
arch.ending_rid));
        DEFINE(IA64_VCPU_DOMAIN_ITM_OFFSET, offsetof (struct vcpu, 
arch.domain_itm));
        DEFINE(IA64_VCPU_DOMAIN_ITM_LAST_OFFSET, offsetof (struct vcpu, 
arch.domain_itm_last));
+       DEFINE(IA64_VCPU_ITLB_OFFSET, offsetof (struct vcpu, arch.itlb));
+       DEFINE(IA64_VCPU_DTLB_OFFSET, offsetof (struct vcpu, arch.dtlb));
 
        BLANK();
        DEFINE(IA64_CPUINFO_ITM_NEXT_OFFSET, offsetof (struct cpuinfo_ia64, 
itm_next));

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.