[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-changelog] Add fast path for thash hyperprivop
# HG changeset patch # User djm@xxxxxxxxxxxxxxx # Node ID 89d92ce1092462f1999221d2615a9976d78bd17b # Parent 0e774127646895866311f9f617b38577891bf9b9 Add fast path for thash hyperprivop Add fast path for ptc.ga hyperprivop Add fast path for iaccess/daccess reflect diff -r 0e7741276468 -r 89d92ce10924 xen/arch/ia64/hyperprivop.S --- a/xen/arch/ia64/hyperprivop.S Sat Jul 9 14:36:13 2005 +++ b/xen/arch/ia64/hyperprivop.S Sat Jul 9 14:37:13 2005 @@ -124,6 +124,10 @@ // HYPERPRIVOP_ITC_I? cmp.eq p7,p6=XEN_HYPER_ITC_I,r17 (p7) br.sptk.many hyper_itc_i;; + + // HYPERPRIVOP_THASH? + cmp.eq p7,p6=XEN_HYPER_THASH,r17 +(p7) br.sptk.many hyper_thash;; // if not one of the above, give up for now and do it the slow way br.sptk.many dispatch_break_fault ;; @@ -440,7 +444,6 @@ END(fast_tick_reflect) // reflect domain breaks directly to domain -// FIXME: DOES NOT WORK YET // r16 == cr.isr // r17 == cr.iim // r18 == XSI_PSR_IC @@ -471,15 +474,30 @@ cmp.eq p7,p0=r22,r17; (p7) br.spnt.few dispatch_break_fault ;; #endif -#ifdef FAST_REFLECT_CNT - movl r20=fast_reflect_count+((0x2c00>>8)*8);; - ld8 r21=[r20];; - adds r21=1,r21;; - st8 [r20]=r21;; -#endif + movl r20=0x2c00; // save iim in shared_info adds r21=XSI_IIM_OFS-XSI_PSR_IC_OFS,r18 ;; st8 [r21]=r17;; + // fall through + + +// reflect to domain ivt+r20 +// sets up isr,iip,ipsr,ifs (FIXME: do iipa too) +// r16 == cr.isr +// r18 == XSI_PSR_IC +// r20 == offset into ivt +// r29 == iip +// r30 == ipsr +// r31 == pr +ENTRY(fast_reflect) +#ifdef FAST_REFLECT_CNT + movl r22=fast_reflect_count; + shr r23=r20,5;; + add r22=r22,r23;; + ld8 r21=[r22];; + adds r21=1,r21;; + st8 [r22]=r21;; +#endif // save iip in shared_info (DON'T POINT TO NEXT INSTRUCTION!) adds r21=XSI_IIP_OFS-XSI_PSR_IC_OFS,r18 ;; st8 [r21]=r29;; @@ -504,9 +522,9 @@ or r30=r30,r28;; and r30=r30,r27;; // also set shared_mem ipsr.i and ipsr.ic appropriately - ld8 r20=[r18];; - extr.u r22=r20,32,32 - cmp4.eq p6,p7=r20,r0;; + ld8 r24=[r18];; + extr.u r22=r24,32,32 + cmp4.eq p6,p7=r24,r0;; (p6) dep r30=0,r30,IA64_PSR_IC_BIT,1 (p7) dep r30=-1,r30,IA64_PSR_IC_BIT,1 ;; cmp4.eq p6,p7=r22,r0;; @@ -520,13 +538,13 @@ // cover and set shared_mem precover_ifs to cr.ifs // set shared_mem ifs and incomplete_regframe to 0 cover ;; - mov r20=cr.ifs;; + mov r24=cr.ifs;; adds r21=XSI_INCOMPL_REG_OFS-XSI_PSR_IC_OFS,r18 ;; st4 [r21]=r0 ;; adds r21=XSI_IFS_OFS-XSI_PSR_IC_OFS,r18 ;; st8 [r21]=r0 ;; adds r21=XSI_PRECOVER_IFS_OFS-XSI_PSR_IC_OFS,r18 ;; - st8 [r21]=r20 ;; + st8 [r21]=r24 ;; // vpsr.i = vpsr.ic = 0 on delivery of interruption st8 [r18]=r0;; // FIXME: need to save iipa and isr to be arch-compliant @@ -534,22 +552,30 @@ mov r22=IA64_KR(CURRENT);; adds r22=IA64_VCPU_IVA_OFFSET,r22;; ld8 r23=[r22];; - movl r24=0x2c00;; - add r24=r24,r23;; - mov cr.iip=r24;; + add r20=r20,r23;; + mov cr.iip=r20;; // OK, now all set to go except for switch to virtual bank0 mov r30=r2; mov r29=r3;; adds r2=XSI_BANK1_OFS-XSI_PSR_IC_OFS,r18; adds r3=(XSI_BANK1_OFS+8)-XSI_PSR_IC_OFS,r18;; bsw.1;; - st8 [r2]=r16,16; st8 [r3]=r17,16 ;; - st8 [r2]=r18,16; st8 [r3]=r19,16 ;; - st8 [r2]=r20,16; st8 [r3]=r21,16 ;; - st8 [r2]=r22,16; st8 [r3]=r23,16 ;; - st8 [r2]=r24,16; st8 [r3]=r25,16 ;; - st8 [r2]=r26,16; st8 [r3]=r27,16 ;; - st8 [r2]=r28,16; st8 [r3]=r29,16 ;; - st8 [r2]=r30,16; st8 [r3]=r31,16 ;; + // FIXME: need to handle ar.unat! + .mem.offset 0,0; st8.spill [r2]=r16,16; + .mem.offset 8,0; st8.spill [r3]=r17,16 ;; + .mem.offset 0,0; st8.spill [r2]=r18,16; + .mem.offset 8,0; st8.spill [r3]=r19,16 ;; + .mem.offset 0,0; st8.spill [r2]=r20,16; + .mem.offset 8,0; st8.spill [r3]=r21,16 ;; + .mem.offset 0,0; st8.spill [r2]=r22,16; + .mem.offset 8,0; st8.spill [r3]=r23,16 ;; + .mem.offset 0,0; st8.spill [r2]=r24,16; + .mem.offset 8,0; st8.spill [r3]=r25,16 ;; + .mem.offset 0,0; st8.spill [r2]=r26,16; + .mem.offset 8,0; st8.spill [r3]=r27,16 ;; + .mem.offset 0,0; st8.spill [r2]=r28,16; + .mem.offset 8,0; st8.spill [r3]=r29,16 ;; + .mem.offset 0,0; st8.spill [r2]=r30,16; + .mem.offset 8,0; st8.spill [r3]=r31,16 ;; movl r31=XSI_IPSR;; bsw.0 ;; mov r2=r30; mov r3=r29;; @@ -558,6 +584,41 @@ mov pr=r31,-1 ;; rfi ;; + +// reflect access faults (0x2400,0x2800,0x5300) directly to domain +// r16 == isr +// r17 == ifa +// r19 == reflect number (only pass-thru to dispatch_reflection) +// r20 == offset into ivt +// r31 == pr +GLOBAL_ENTRY(fast_access_reflect) + mov r30=cr.ipsr;; + mov r29=cr.iip;; + extr.u r21=r30,IA64_PSR_BE_BIT,1 ;; + cmp.ne p7,p0=r21,r0 +(p7) br.spnt.few dispatch_reflection ;; + extr.u r21=r30,IA64_PSR_PP_BIT,1 ;; + cmp.ne p7,p0=r21,r0 +(p7) br.spnt.few dispatch_reflection ;; + extr.u r21=r30,IA64_PSR_CPL0_BIT,2 ;; + cmp.eq p7,p0=r21,r0 +(p7) br.spnt.few dispatch_reflection ;; + movl r18=XSI_PSR_IC;; + ld8 r21=[r18];; + cmp.eq p7,p0=r0,r21 +(p7) br.spnt.few dispatch_reflection ;; + // set shared_mem ifa, FIXME: should we validate it? + mov r17=cr.ifa;; + adds r21=XSI_IFA_OFS-XSI_PSR_IC_OFS,r18 ;; + st8 [r21]=r17 ;; + // get rr[ifa] and save to itir in shared memory (extra bits ignored) + shr.u r22=r17,61 + adds r23=XSI_ITIR_OFS-XSI_PSR_IC_OFS,r18 + adds r21=XSI_RR0_OFS-XSI_PSR_IC_OFS,r18 ;; + shladd r22=r22,3,r21;; + ld8 r22=[r22];; + st8 [r23]=r22;; + br.cond.sptk.many fast_reflect;; // ensure that, if giving up, registers at entry to fast_hyperprivop unchanged @@ -1312,8 +1373,146 @@ ;; END(hyper_set_rr) +// this routine was derived from optimized assembly output from +// vcpu_thash so it is dense and difficult to read but it works +// On entry: +// r18 == XSI_PSR_IC +// r31 == pr +GLOBAL_ENTRY(hyper_thash) +#ifdef FAST_HYPERPRIVOP_CNT + movl r20=fast_hyperpriv_cnt+(8*XEN_HYPER_THASH);; + ld8 r21=[r20];; + adds r21=1,r21;; + st8 [r20]=r21;; +#endif + shr.u r20 = r8, 61 + addl r25 = 1, r0 + movl r17 = 0xe000000000000000 + ;; + and r21 = r17, r8 // VHPT_Addr1 + ;; + shladd r28 = r20, 3, r18 + adds r19 = XSI_PTA_OFS-XSI_PSR_IC_OFS, r18 + ;; + adds r27 = XSI_RR0_OFS-XSI_PSR_IC_OFS, r28 + addl r28 = 32767, r0 + ld8 r24 = [r19] // pta + ;; + ld8 r23 = [r27] // rrs[vadr>>61] + extr.u r26 = r24, 2, 6 + ;; + extr.u r22 = r23, 2, 6 + shl r30 = r25, r26 + ;; + shr.u r19 = r8, r22 + shr.u r29 = r24, 15 + ;; + adds r17 = -1, r30 + ;; + shladd r27 = r19, 3, r0 + extr.u r26 = r17, 15, 46 + ;; + andcm r24 = r29, r26 + and r19 = r28, r27 + shr.u r25 = r27, 15 + ;; + and r23 = r26, r25 + ;; + or r22 = r24, r23 + ;; + dep.z r20 = r22, 15, 46 + ;; + or r16 = r20, r21 + ;; + or r8 = r19, r16 + // done, update iip/ipsr to next instruction + mov r24=cr.ipsr + mov r25=cr.iip;; + extr.u r26=r24,41,2 ;; + cmp.eq p6,p7=2,r26 ;; +(p6) mov r26=0 +(p6) adds r25=16,r25 +(p7) adds r26=1,r26 + ;; + dep r24=r26,r24,41,2 + ;; + mov cr.ipsr=r24 + mov cr.iip=r25 + mov pr=r31,-1 ;; + rfi + ;; +END(hyper_thash) + ENTRY(hyper_ptc_ga) - br.spnt.many dispatch_break_fault ;; +#ifdef CONFIG_SMP +FIXME: ptc.ga instruction requires spinlock for SMP +#endif + // FIXME: validate not flushing Xen addresses +#ifdef FAST_HYPERPRIVOP_CNT + movl r20=fast_hyperpriv_cnt+(8*XEN_HYPER_PTC_GA);; + ld8 r21=[r20];; + adds r21=1,r21;; + st8 [r20]=r21;; +#endif + mov r28=r8 + extr.u r19=r9,2,6 // addr_range=1<<((r9&0xfc)>>2) + mov r20=1 + shr.u r24=r8,61 + addl r27=56,r0 // PAGE_SHIFT<<2 (for ptc.ga) + movl r26=0x8000000000000000 // INVALID_TI_TAG + mov r30=ar.lc + ;; + shl r19=r20,r19 + cmp.eq p7,p0=7,r24 +(p7) br.spnt.many dispatch_break_fault ;; // slow way for rr7 + ;; + cmp.le p7,p0=r19,r0 // skip flush if size<=0 +(p7) br.cond.dpnt 2f ;; + extr.u r24=r19,0,PAGE_SHIFT + shr.u r23=r19,PAGE_SHIFT ;; // repeat loop for n pages + cmp.ne p7,p0=r24,r0 ;; +(p7) adds r23=1,r23 ;; // n_pages<size<n_pages+1? extra iter + mov ar.lc=r23 + movl r29=PAGE_SIZE;; +1: + thash r25=r28 ;; + adds r25=16,r25 ;; + ld8 r24=[r25] ;; + // FIXME: should check if tag matches, not just blow it away + or r24=r26,r24 ;; // vhpt_entry->ti_tag = 1 + st8 [r25]=r24 + ptc.ga r28,r27 ;; + srlz.i ;; + add r28=r29,r28 + br.cloop.sptk.few 1b + ;; +2: + mov ar.lc=r30 ;; + mov r29=cr.ipsr + mov r30=cr.iip;; + mov r27=IA64_KR(CURRENT);; + adds r25=IA64_VCPU_DTLB_OFFSET,r27 + adds r26=IA64_VCPU_ITLB_OFFSET,r27;; + ld8 r24=[r25] + ld8 r27=[r26] ;; + and r24=-2,r24 + and r27=-2,r27 ;; + st8 [r25]=r24 // set 1-entry i/dtlb as not present + st8 [r26]=r27 ;; + // increment to point to next instruction + extr.u r26=r29,41,2 ;; + cmp.eq p6,p7=2,r26 ;; +(p6) mov r26=0 +(p6) adds r30=16,r30 +(p7) adds r26=1,r26 + ;; + dep r29=r26,r29,41,2 + ;; + mov cr.ipsr=r29 + mov cr.iip=r30 + mov pr=r31,-1 ;; + rfi + ;; END(hyper_ptc_ga) ENTRY(hyper_itc_d) @@ -1323,5 +1522,3 @@ ENTRY(hyper_itc_i) br.spnt.many dispatch_break_fault ;; END(hyper_itc_i) - -// ignore me diff -r 0e7741276468 -r 89d92ce10924 xen/arch/ia64/ivt.S --- a/xen/arch/ia64/ivt.S Sat Jul 9 14:36:13 2005 +++ b/xen/arch/ia64/ivt.S Sat Jul 9 14:37:13 2005 @@ -666,7 +666,12 @@ // 0x2400 Entry 9 (size 64 bundles) Instruction Access-bit (27) ENTRY(iaccess_bit) #ifdef XEN - REFLECT(9) + mov r31=pr; + mov r16=cr.isr + mov r17=cr.ifa + mov r19=9 + movl r20=0x2400 + br.sptk.many fast_access_reflect;; #endif DBG_FAULT(9) // Like Entry 8, except for instruction access @@ -734,7 +739,12 @@ // 0x2800 Entry 10 (size 64 bundles) Data Access-bit (15,55) ENTRY(daccess_bit) #ifdef XEN - REFLECT(10) + mov r31=pr; + mov r16=cr.isr + mov r17=cr.ifa + mov r19=10 + movl r20=0x2800 + br.sptk.many fast_access_reflect;; #endif DBG_FAULT(10) // Like Entry 8, except for data access @@ -1395,7 +1405,12 @@ // 0x5300 Entry 23 (size 16 bundles) Data Access Rights (14,53) ENTRY(daccess_rights) #ifdef XEN - REFLECT(23) + mov r31=pr; + mov r16=cr.isr + mov r17=cr.ifa + mov r19=23 + movl r20=0x5300 + br.sptk.many fast_access_reflect;; #endif DBG_FAULT(23) mov r16=cr.ifa @@ -1821,7 +1836,7 @@ #ifdef XEN .org ia64_ivt+0x8000 -ENTRY(dispatch_reflection) +GLOBAL_ENTRY(dispatch_reflection) /* * Input: * psr.ic: off diff -r 0e7741276468 -r 89d92ce10924 xen/arch/ia64/asm-offsets.c --- a/xen/arch/ia64/asm-offsets.c Sat Jul 9 14:36:13 2005 +++ b/xen/arch/ia64/asm-offsets.c Sat Jul 9 14:37:13 2005 @@ -46,6 +46,8 @@ DEFINE(XSI_PSR_IC, (SHAREDINFO_ADDR+offsetof(vcpu_info_t, arch.interrupt_collection_enabled))); DEFINE(XSI_PSR_I_OFS, offsetof(vcpu_info_t, arch.interrupt_delivery_enabled)); DEFINE(XSI_IIP_OFS, offsetof(vcpu_info_t, arch.iip)); + DEFINE(XSI_IFA_OFS, offsetof(vcpu_info_t, arch.ifa)); + DEFINE(XSI_ITIR_OFS, offsetof(vcpu_info_t, arch.itir)); DEFINE(XSI_IPSR, (SHAREDINFO_ADDR+offsetof(vcpu_info_t, arch.ipsr))); DEFINE(XSI_IPSR_OFS, offsetof(vcpu_info_t, arch.ipsr)); DEFINE(XSI_IFS_OFS, offsetof(vcpu_info_t, arch.ifs)); @@ -61,6 +63,7 @@ DEFINE(XSI_PEND_OFS, offsetof(vcpu_info_t, arch.pending_interruption)); DEFINE(XSI_RR0_OFS, offsetof(vcpu_info_t, arch.rrs[0])); DEFINE(XSI_TPR_OFS, offsetof(vcpu_info_t, arch.tpr)); + DEFINE(XSI_PTA_OFS, offsetof (vcpu_info_t, arch.pta)); DEFINE(XSI_ITV_OFS, offsetof(vcpu_info_t, arch.itv)); //DEFINE(IA64_TASK_BLOCKED_OFFSET,offsetof (struct task_struct, blocked)); //DEFINE(IA64_TASK_CLEAR_CHILD_TID_OFFSET,offsetof (struct task_struct, clear_child_tid)); @@ -85,6 +88,8 @@ DEFINE(IA64_VCPU_ENDING_RID_OFFSET, offsetof (struct vcpu, arch.ending_rid)); DEFINE(IA64_VCPU_DOMAIN_ITM_OFFSET, offsetof (struct vcpu, arch.domain_itm)); DEFINE(IA64_VCPU_DOMAIN_ITM_LAST_OFFSET, offsetof (struct vcpu, arch.domain_itm_last)); + DEFINE(IA64_VCPU_ITLB_OFFSET, offsetof (struct vcpu, arch.itlb)); + DEFINE(IA64_VCPU_DTLB_OFFSET, offsetof (struct vcpu, arch.dtlb)); BLANK(); DEFINE(IA64_CPUINFO_ITM_NEXT_OFFSET, offsetof (struct cpuinfo_ia64, itm_next)); _______________________________________________ Xen-changelog mailing list Xen-changelog@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-changelog
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |