[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] scaling problem with writable pagetables
We started to test a Linux dom0 up to 16-way (4 sockets, dual core, with HT) and we began to see some serious scaling issues, compared to scaling baremetal Linux to 16-way. We took some profiles and saw that functions in xen/arch/x86/mm.c were using disproportionately more CPU time as we scaled up the number of CPUs. Taking a quick look and those functions (for example, like update_va_mapping and do_mmu_update) it became kind of obvious that the locking probably does not scale. It appears we lock domain-wide on many of thee functions. This, IMO, causes a serious problem when running an SMP domain which happens to page fault a lot. So, I got to thinking, just how much protection do we really need in these functions? The OS should already provide quite a bit of protection to page table writes. Is Xen imposing an even more deliberate and possibly unnecessary protection here? So I made some changes to when we lock/unlock in most of these functions in mm.c (patch attached). Warning: I am pretty much making a shot in the dark here. I do not know this code nearly well enough to say this is the right thing to do. However, I can say without a doubt the changes make a significant change in performance: benchmark throughput increase with lock reduction SDET 19% reaim_shared 65% reaim_fserver 16%Below are per-function ratios of CPU time rev8830/rev8830-lock-reduction (derived from oprofile diffs) SDET: 9.84/1 restore_all_guest 1.45/1 mod_l1_entry 2.59/1 do_softirq 1.63/1 test_guest_events 1.09/1 syscall_enter 1.35/1 propagate_page_fault 1.18/1 process_guest_except 1.13/1 timer_softirq_action 1.04/1 alloc_page_type 1.05/1 revalidate_l1 1.08/1 do_set_segment_base 1.10/1 get_s_time 1.19/1 __context_switch 1.09/1 switch_to_kernel 1.11/1 FLT4 1.62/1 xen_l3_entry_update 1.27/1 xen_invlpg_mask reaim_shared: 1.43/1 do_update_va_mapping 1.44/1 do_page_fault 1.47/1 do_mmu_update 6.75/1 restore_all_guest 1.43/1 do_mmuext_op 1.37/1 sedf_do_schedule 1.20/1 mod_l1_entry 2.46/1 do_softirq 1.27/1 t_timer_fn 1.34/1 do_set_segment_base 1.20/1 timer_softirq_action 1.24/1 process_guest_except 1.12/1 timer_interrupt 1.14/1 evtchn_send reaim_fserver: 1.16/1 do_update_va_mapping 1.13/1 do_page_fault 8.41/1 restore_all_guest 1.17/1 do_mmu_update 1.56/1 mod_l1_entry 2.48/1 do_softirq 1.02/1 do_mmuext_op 1.14/1 sedf_do_schedule 1.12/1 t_timer_fn 1.23/1 do_set_segment_base 1.11/1 device_not_available 1.11/1 timer_softirq_action 1.13/1 process_guest_except 1.20/1 timer_interrupt 1.15/1 copy_from_user 1.11/1 propagate_page_fault Any comments greatly appreciated. -Andrew <signed-off-by: habanero@xxxxxxxxxx> diff -Naurp xen-unstable.hg-8830/xen/arch/x86/mm.c xen-unstable.hg-8830-lockfix/xen/arch/x86/mm.c --- xen-unstable.hg-8830/xen/arch/x86/mm.c 2006-02-15 16:30:31.000000000 -0600 +++ xen-unstable.hg-8830-lockfix/xen/arch/x86/mm.c 2006-02-15 16:32:48.000000000 -0600 @@ -1729,6 +1729,8 @@ int do_mmuext_op( cleanup_writable_pagetable(d); + UNLOCK_BIGLOCK(d); + if ( unlikely(count & MMU_UPDATE_PREEMPTED) ) { count &= ~MMU_UPDATE_PREEMPTED; @@ -1957,7 +1959,6 @@ int do_mmuext_op( if ( unlikely(pdone != NULL) ) __put_user(done + i, pdone); - UNLOCK_BIGLOCK(d); return rc; } @@ -1982,6 +1983,8 @@ int do_mmu_update( cleanup_writable_pagetable(d); + UNLOCK_BIGLOCK(d); + if ( unlikely(shadow_mode_enabled(d)) ) check_pagetable(v, "pre-mmu"); /* debug */ @@ -2206,7 +2209,6 @@ int do_mmu_update( if ( unlikely(shadow_mode_enabled(d)) ) check_pagetable(v, "post-mmu"); /* debug */ - UNLOCK_BIGLOCK(d); return rc; } @@ -2503,6 +2505,8 @@ int do_update_va_mapping(unsigned long v cleanup_writable_pagetable(d); + UNLOCK_BIGLOCK(d); + if ( unlikely(shadow_mode_enabled(d)) ) check_pagetable(v, "pre-va"); /* debug */ @@ -2574,8 +2578,6 @@ int do_update_va_mapping(unsigned long v process_deferred_ops(cpu); - UNLOCK_BIGLOCK(d); - return rc; } @@ -2675,13 +2677,9 @@ long do_set_gdt(unsigned long *frame_lis if ( copy_from_user(frames, frame_list, nr_pages * sizeof(unsigned long)) ) return -EFAULT; - LOCK_BIGLOCK(current->domain); - if ( (ret = set_gdt(current, frames, entries)) == 0 ) local_flush_tlb(); - UNLOCK_BIGLOCK(current->domain); - return ret; } @@ -2700,21 +2698,18 @@ long do_update_descriptor(u64 pa, u64 de *(u64 *)&d = desc; - LOCK_BIGLOCK(dom); if ( !VALID_MFN(mfn = gmfn_to_mfn(dom, gmfn)) || (((unsigned int)pa % sizeof(struct desc_struct)) != 0) || !mfn_valid(mfn) || !check_descriptor(&d) ) { - UNLOCK_BIGLOCK(dom); return -EINVAL; } page = mfn_to_page(mfn); if ( unlikely(!get_page(page, dom)) ) { - UNLOCK_BIGLOCK(dom); return -EINVAL; } @@ -2760,8 +2755,6 @@ long do_update_descriptor(u64 pa, u64 de out: put_page(page); - UNLOCK_BIGLOCK(dom); - return ret; } @@ -2793,7 +2786,6 @@ long arch_memory_op(int op, void *arg) return -ESRCH; } - LOCK_BIGLOCK(d); if ( d->arch.first_reserved_pfn == 0 ) { d->arch.first_reserved_pfn = pfn = d->max_pages; @@ -2803,7 +2795,6 @@ long arch_memory_op(int op, void *arg) guest_physmap_add_page( d, pfn + 1 + i, gnttab_shared_mfn(d, d->grant_table, i)); } - UNLOCK_BIGLOCK(d); xrpa.first_gpfn = d->arch.first_reserved_pfn; xrpa.nr_gpfns = 32; _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-devel
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |