[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-ia64-devel] [PATCH] unify vtlb and vhpt
Hi, Currently a HVM domain has vtlb and vhpt individually. This patch unifies them. A vtlb entry is recorded in vhpt collision chain area. - improve flexibility. currently vtlb size is fixed but some applications like ie32el consume much vtlb. - utilize vhpt collision chain area. it looks sparse. - reduce TLB miss for access to a vtlb entry. since vhpt is mapped on a TR. - speedup ptc.e emulation slightly. On the other hand, there would be a slight overhead in searching a TLB entry. In my testing, any performance degradation can't be seen. Thanks, Kouya Signed-off-by: Kouya Shimura <kouya@xxxxxxxxxxxxxx> diff -r daf39fc8038a xen/arch/ia64/vmx/vmmu.c --- a/xen/arch/ia64/vmx/vmmu.c Wed Feb 27 13:08:59 2008 -0700 +++ b/xen/arch/ia64/vmx/vmmu.c Thu Feb 28 15:50:36 2008 +0900 @@ -24,20 +24,7 @@ #include <xen/sched-if.h> #include <asm/vhpt.h> -static int default_vtlb_sz = DEFAULT_VTLB_SZ; static int default_vhpt_sz = DEFAULT_VHPT_SZ; - -static void __init parse_vtlb_size(char *s) -{ - int sz = parse_size_and_unit(s, NULL); - - if (sz > 0) { - default_vtlb_sz = fls(sz - 1); - /* minimum 16KB (for tag uniqueness) */ - if (default_vtlb_sz < 14) - default_vtlb_sz = 14; - } -} static void __init parse_vhpt_size(char *s) { @@ -48,7 +35,6 @@ static void __init parse_vhpt_size(char } } -custom_param("vti_vtlb_size", parse_vtlb_size); custom_param("vti_vhpt_size", parse_vhpt_size); @@ -82,7 +68,6 @@ int init_domain_tlb(struct vcpu *v) if (rc) return rc; - rc = thash_alloc(&(v->arch.vtlb), default_vtlb_sz, "vtlb"); if (rc) { free_domain_vhpt(v); return rc; @@ -94,9 +79,6 @@ int init_domain_tlb(struct vcpu *v) void free_domain_tlb(struct vcpu *v) { - if (v->arch.vtlb.hash) - thash_free(&(v->arch.vtlb)); - free_domain_vhpt(v); } @@ -164,8 +146,6 @@ fetch_code(VCPU *vcpu, u64 gip, IA64_BUN } else { tlb = vtlb_lookup(vcpu, gip, ISIDE_TLB); -// if( tlb == NULL ) -// tlb = vtlb_lookup(vcpu, gip, DSIDE_TLB ); if (tlb) gpip = (tlb->ppn >>(tlb->ps-12)<<tlb->ps) | ( gip & (PSIZE(tlb->ps)-1) ); } diff -r daf39fc8038a xen/arch/ia64/vmx/vmx_fault.c --- a/xen/arch/ia64/vmx/vmx_fault.c Wed Feb 27 13:08:59 2008 -0700 +++ b/xen/arch/ia64/vmx/vmx_fault.c Thu Feb 28 14:17:10 2008 +0900 @@ -392,7 +392,8 @@ try_again: return IA64_FAULT; } } - thash_vhpt_insert(v, data->page_flags, data->itir, vadr, type); + thash_vhpt_insert(v, data->page_flags, data->itir & ~ITIR_VTLB, + vadr, type); return IA64_NO_FAULT; } diff -r daf39fc8038a xen/arch/ia64/vmx/vmx_ivt.S --- a/xen/arch/ia64/vmx/vmx_ivt.S Wed Feb 27 13:08:59 2008 -0700 +++ b/xen/arch/ia64/vmx/vmx_ivt.S Thu Feb 28 16:19:06 2008 +0900 @@ -58,6 +58,7 @@ #include <asm/thread_info.h> #include <asm/unistd.h> #include <asm/vhpt.h> +#include <asm/vmmu.h> #include <asm/virt_event.h> #include <asm/vmx_phy_mode.h> #include <xen/errno.h> @@ -169,13 +170,15 @@ vmx_itlb_loop: adds r16 = VLE_TITAG_OFFSET, r17 adds r19 = VLE_CCHAIN_OFFSET, r17 ;; - ld8 r24 = [r16] // Read tag + ld8 r24 = [r16],VLE_ITIR_OFFSET-VLE_TITAG_OFFSET // Read tag ld8 r23 = [r19] // Read chain ;; + ld8 r19 = [r16],VLE_TITAG_OFFSET-VLE_ITIR_OFFSET // Read itir lfetch [r23] - cmp.eq p6,p7 = r20, r24 // does tag match ? + cmp.eq p0,p7 = r20, r24 // does tag match ? ;; (p7)mov r17 = r23; // No: entry = chain + tbit.nz p6,p0 = r19, ITIR_VTLB_BIT // vtlb? (p7)br.sptk vmx_itlb_loop // again ;; // Swap the first entry with the entry found in the collision chain @@ -183,6 +186,7 @@ vmx_itlb_loop: // In comments 1 stands for the first entry and 2 for the found entry. ld8 r29 = [r28] // Read tag of 1 dep r22 = -1,r24,63,1 // set ti=1 of 2 (to disable it during the swap) +(p6)br.sptk vmx_itlb_loop // again ;; ld8 r25 = [r17] // Read value of 2 ld8 r27 = [r18] // Read value of 1 @@ -190,13 +194,12 @@ vmx_itlb_loop: st8 [r28] = r22, VLE_ITIR_OFFSET - VLE_TITAG_OFFSET // Write tag of 1 mf ;; - ld8 r29 = [r16] // read itir of 2 ld8 r22 = [r28] // read itir of 1 st8 [r18] = r25 // Write value of 1 st8 [r17] = r27 // Write value of 2 ;; st8 [r16] = r22 // Write itir of 2 - st8 [r28] = r29, VLE_TITAG_OFFSET - VLE_ITIR_OFFSET // write itir of 1 + st8 [r28] = r19, VLE_TITAG_OFFSET - VLE_ITIR_OFFSET // write itir of 1 ;; st8.rel [r28] = r24 // Write tag of 1 (with ti=0) // Insert the translation entry @@ -252,17 +255,20 @@ vmx_dtlb_loop: adds r16 = VLE_TITAG_OFFSET, r17 adds r19 = VLE_CCHAIN_OFFSET, r17 ;; - ld8 r24 = [r16] - ld8 r23 = [r19] - ;; + ld8 r24 = [r16],VLE_ITIR_OFFSET-VLE_TITAG_OFFSET // Read tag + ld8 r23 = [r19] // Read chain + ;; + ld8 r19 = [r16],VLE_TITAG_OFFSET-VLE_ITIR_OFFSET // Read itir lfetch [r23] - cmp.eq p6,p7 = r20, r24 + cmp.eq p0,p7 = r20, r24 ;; (p7)mov r17 = r23; + tbit.nz p6,p0 = r19, ITIR_VTLB_BIT // vtlb? (p7)br.sptk vmx_dtlb_loop ;; ld8 r29 = [r28] dep r22 = -1,r24,63,1 //set ti=1 +(p6)br.sptk vmx_dtlb_loop ;; ld8 r25 = [r17] ld8 r27 = [r18] @@ -270,13 +276,12 @@ vmx_dtlb_loop: st8 [r28] = r22, VLE_ITIR_OFFSET - VLE_TITAG_OFFSET mf ;; - ld8 r29 = [r16] ld8 r22 = [r28] st8 [r18] = r25 st8 [r17] = r27 ;; st8 [r16] = r22 - st8 [r28] = r29, VLE_TITAG_OFFSET - VLE_ITIR_OFFSET + st8 [r28] = r19, VLE_TITAG_OFFSET - VLE_ITIR_OFFSET ;; st8.rel [r28] = r24 itc.d r25 diff -r daf39fc8038a xen/arch/ia64/vmx/vmx_virt.c --- a/xen/arch/ia64/vmx/vmx_virt.c Wed Feb 27 13:08:59 2008 -0700 +++ b/xen/arch/ia64/vmx/vmx_virt.c Thu Feb 28 11:57:11 2008 +0900 @@ -1418,10 +1418,6 @@ vmx_emulate(VCPU *vcpu, REGS *regs) cause = VMX(vcpu,cause); opcode = VMX(vcpu,opcode); -#ifdef VTLB_DEBUG - check_vtlb_sanity(vmx_vcpu_get_vtlb(vcpu)); - dump_vtlb(vmx_vcpu_get_vtlb(vcpu)); -#endif #if 0 if ( (cause == 0xff && opcode == 0x1e000000000) || cause == 0 ) { printk ("VMAL decode error: cause - %lx; op - %lx\n", diff -r daf39fc8038a xen/arch/ia64/vmx/vtlb.c --- a/xen/arch/ia64/vmx/vtlb.c Wed Feb 27 13:08:59 2008 -0700 +++ b/xen/arch/ia64/vmx/vtlb.c Thu Feb 28 15:20:01 2008 +0900 @@ -178,7 +178,7 @@ void thash_vhpt_insert(VCPU *v, u64 pte, mrr.rrval = ia64_get_rr(va); if (itir_ps(itir) >= mrr.ps && VMX_MMU_MODE(v) != VMX_MMU_PHY_D) { - vmx_vhpt_insert(vcpu_get_vhpt(v), phy_pte, itir, va); + vmx_vhpt_insert(&v->arch.vhpt, phy_pte, itir, va); } else { if (VMX_MMU_MODE(v) == VMX_MMU_PHY_D) itir = (itir & ~RR_PS_MASK) | (mrr.rrval & RR_PS_MASK); @@ -309,7 +309,7 @@ static void vtlb_purge(VCPU *v, u64 va, thash_data_t *cur; u64 start, curadr, size, psbits, tag, rr_ps, num; ia64_rr vrr; - thash_cb_t *hcb = &v->arch.vtlb; + thash_cb_t *hcb = &v->arch.vhpt; vcpu_get_rr(v, va, &vrr.rrval); psbits = VMX(v, psbits[(va >> 61)]); @@ -323,10 +323,9 @@ static void vtlb_purge(VCPU *v, u64 va, vrr.ps = rr_ps; while (num) { cur = vtlb_thash(hcb->pta, curadr, vrr.rrval, &tag); - while (cur) { - if (cur->etag == tag && cur->ps == rr_ps) + for (cur = cur->next; cur; cur = cur->next) { + if (THASH_MATCH_VTLB(cur, tag, rr_ps)) cur->etag = 1UL << 63; - cur = cur->next; } curadr += size; num--; @@ -353,7 +352,7 @@ static void vhpt_purge(VCPU *v, u64 va, cur = (thash_data_t *)ia64_thash(start); tag = ia64_ttag(start); while (cur) { - if (cur->etag == tag) + if (cur->etag == tag) // && cur->itir & ITIR_VTLB == 0 cur->etag = 1UL << 63; cur = cur->next; } @@ -407,11 +406,9 @@ static void vtlb_insert(VCPU *v, u64 pte static void vtlb_insert(VCPU *v, u64 pte, u64 itir, u64 va) { thash_data_t *hash_table, *cch, *tail; - /* int flag; */ ia64_rr vrr; - /* u64 gppn, ppns, ppne; */ u64 tag, len; - thash_cb_t *hcb = &v->arch.vtlb; + thash_cb_t *hcb = &v->arch.vhpt; vcpu_quick_region_set(PSCBX(v, tc_regions), va); @@ -420,18 +417,16 @@ static void vtlb_insert(VCPU *v, u64 pte VMX(v, psbits[va >> 61]) |= (1UL << vrr.ps); hash_table = vtlb_thash(hcb->pta, va, vrr.rrval, &tag); len = 0; - cch = hash_table; - do { + for (cch = hash_table->next; cch; cch = cch->next) { if (INVALID_TLB(cch)) { cch->page_flags = pte; - cch->itir = itir; + cch->itir = itir | ITIR_VTLB; cch->etag = tag; return; } ++len; tail = cch; - cch = cch->next; - } while(cch); + } if (len >= MAX_CCN_DEPTH) { thash_recycle_cch(hcb, hash_table, tail); cch = cch_alloc(hcb); @@ -440,7 +435,7 @@ static void vtlb_insert(VCPU *v, u64 pte cch = __alloc_chain(hcb); } cch->page_flags = pte; - cch->itir = itir; + cch->itir = itir | ITIR_VTLB; cch->etag = tag; cch->next = hash_table->next; wmb(); @@ -587,24 +582,10 @@ void thash_purge_all(VCPU *v) { int num; thash_data_t *head; - thash_cb_t *vtlb,*vhpt; - vtlb = &v->arch.vtlb; - vhpt = &v->arch.vhpt; + thash_cb_t *vhpt = &v->arch.vhpt; for (num = 0; num < 8; num++) VMX(v, psbits[num]) = 0; - - head = vtlb->hash; - num = (vtlb->hash_sz/sizeof(thash_data_t)); - do{ - head->page_flags = 0; - head->etag = 1UL<<63; - head->itir = 0; - head->next = 0; - head++; - num--; - } while(num); - cch_mem_init(vtlb); head = vhpt->hash; num = (vhpt->hash_sz/sizeof(thash_data_t)); @@ -633,7 +614,7 @@ thash_data_t *vtlb_lookup(VCPU *v, u64 v thash_data_t *cch; u64 psbits, ps, tag; ia64_rr vrr; - thash_cb_t *hcb = &v->arch.vtlb; + thash_cb_t *hcb = &v->arch.vhpt; cch = __vtr_lookup(v, va, is_data); if (cch) @@ -648,11 +629,10 @@ thash_data_t *vtlb_lookup(VCPU *v, u64 v psbits &= ~(1UL << ps); vrr.ps = ps; cch = vtlb_thash(hcb->pta, va, vrr.rrval, &tag); - do { - if (cch->etag == tag && cch->ps == ps) + for (cch = cch->next; cch != NULL; cch = cch->next) { + if (THASH_MATCH_VTLB(cch, tag, ps)) return cch; - cch = cch->next; - } while(cch); + } } return NULL; } diff -r daf39fc8038a xen/include/asm-ia64/domain.h --- a/xen/include/asm-ia64/domain.h Wed Feb 27 13:08:59 2008 -0700 +++ b/xen/include/asm-ia64/domain.h Thu Feb 28 11:57:11 2008 +0900 @@ -273,7 +273,6 @@ struct arch_vcpu { struct thread_struct _thread; // this must be last - thash_cb_t vtlb; thash_cb_t vhpt; char irq_new_pending; char irq_new_condition; // vpsr.i/vtpr change, check for pending VHPI diff -r daf39fc8038a xen/include/asm-ia64/vmmu.h --- a/xen/include/asm-ia64/vmmu.h Wed Feb 27 13:08:59 2008 -0700 +++ b/xen/include/asm-ia64/vmmu.h Thu Feb 28 17:08:09 2008 +0900 @@ -24,9 +24,7 @@ #define XEN_TLBthash_H #define MAX_CCN_DEPTH (15) // collision chain depth -#define DEFAULT_VTLB_SZ (14) // 16K hash + 16K c-chain for VTLB #define DEFAULT_VHPT_SZ (23) // 8M hash + 8M c-chain for VHPT -#define VTLB(v,_x) (v->arch.vtlb._x) #define VHPT(v,_x) (v->arch.vhpt._x) #ifndef __ASSEMBLY__ @@ -51,6 +49,10 @@ enum { #define VTLB_PTE_P (1UL<<VTLB_PTE_P_BIT) #define ITIR_RV_MASK (((1UL<<32)-1)<<32 | 0x3) +#define ITIR_PS_MASK (((1<<6)-1)<<2) +#define ITIR_VTLB_BIT 1 // NB. reserved field +#define ITIR_VTLB (1UL<<ITIR_VTLB_BIT) + #define PAGE_FLAGS_RV_MASK (0x2 | (0x3UL<<50)|(((1UL<<11)-1)<<53)) #define PAGE_FLAGS_AR_PL_MASK ((0x7UL<<9)|(0x3UL<<7)) @@ -103,21 +105,12 @@ typedef struct thash_data { }; } thash_data_t; -#define INVALIDATE_VHPT_HEADER(hdata) \ -{ ((hdata)->page_flags)=0; \ - ((hdata)->itir)=PAGE_SHIFT<<2; \ - ((hdata)->etag)=1UL<<63; \ - ((hdata)->next)=0;} - -#define INVALIDATE_TLB_HEADER(hash) INVALIDATE_VHPT_HEADER(hash) - -#define INVALIDATE_HASH_HEADER(hcb,hash) INVALIDATE_VHPT_HEADER(hash) - #define INVALID_VHPT(hdata) ((hdata)->ti) #define INVALID_TLB(hdata) ((hdata)->ti) -#define INVALID_TR(hdata) (!(hdata)->p) -#define INVALID_ENTRY(hcb, hdata) INVALID_VHPT(hdata) - + +#define THASH_MATCH_VTLB(hdata, tag, ps) \ + ((hdata)->etag == (tag) && \ + ((hdata)->itir & (ITIR_PS_MASK|ITIR_VTLB) == ((ps)<<2)|ITIR_VTLB)) typedef struct thash_cb { /* THASH base information */ diff -r daf39fc8038a xen/include/asm-ia64/vmx_vcpu.h --- a/xen/include/asm-ia64/vmx_vcpu.h Wed Feb 27 13:08:59 2008 -0700 +++ b/xen/include/asm-ia64/vmx_vcpu.h Thu Feb 28 11:57:11 2008 +0900 @@ -380,15 +380,6 @@ static inline unsigned long vrrtomrr(VCP #endif } -static inline thash_cb_t *vmx_vcpu_get_vtlb(VCPU * vcpu) -{ - return &vcpu->arch.vtlb; -} - -static inline thash_cb_t *vcpu_get_vhpt(VCPU * vcpu) -{ - return &vcpu->arch.vhpt; -} /************************************************************************** _______________________________________________ Xen-ia64-devel mailing list Xen-ia64-devel@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-ia64-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |