[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] [PATCH v3 1/1] xen: move TLB-flush filtering out into populate_physmap during vm creation
This patch implemented parts of TODO left in commit id a902c12ee45fc9389eb8fe54eeddaf267a555c58. It moved TLB-flush filtering out into populate_physmap. Because of TLB-flush in alloc_heap_pages, it's very slow to create a guest with memory size of more than 100GB on host with 100+ cpus. This patch introduced a "MEMF_no_tlbflush" bit to memflags to indicate whether TLB-flush should be done in alloc_heap_pages or its caller populate_physmap. Once this bit is set in memflags, alloc_heap_pages will ignore TLB-flush. To use this bit after vm is created might lead to security issue, that is, this would make pages accessible to the guest B, when guest A may still have a cached mapping to them. Therefore, this patch also introduced a "already_scheduled" field to struct domain to indicate whether this domain has ever got scheduled by hypervisor. MEMF_no_tlbflush can be set only during vm creation phase when already_scheduled is still 0 before this domain gets scheduled for the first time. TODO: ballooning very huge amount of memory cannot benefit from this patch and might still be slow. Signed-off-by: Dongli Zhang <dongli.zhang@xxxxxxxxxx> --- Changed since v2: * Limit this optimization to domain creation time. --- xen/common/domain.c | 2 ++ xen/common/memory.c | 33 +++++++++++++++++++++++++++++++++ xen/common/page_alloc.c | 3 ++- xen/common/schedule.c | 5 +++++ xen/include/xen/mm.h | 2 ++ xen/include/xen/sched.h | 3 +++ 6 files changed, 47 insertions(+), 1 deletion(-) diff --git a/xen/common/domain.c b/xen/common/domain.c index a8804e4..611a471 100644 --- a/xen/common/domain.c +++ b/xen/common/domain.c @@ -303,6 +303,8 @@ struct domain *domain_create(domid_t domid, unsigned int domcr_flags, if ( !zalloc_cpumask_var(&d->domain_dirty_cpumask) ) goto fail; + d->already_scheduled = 0; + if ( domcr_flags & DOMCRF_hvm ) d->guest_type = guest_type_hvm; else if ( domcr_flags & DOMCRF_pvh ) diff --git a/xen/common/memory.c b/xen/common/memory.c index f34dd56..3641469 100644 --- a/xen/common/memory.c +++ b/xen/common/memory.c @@ -141,6 +141,8 @@ static void populate_physmap(struct memop_args *a) unsigned int i, j; xen_pfn_t gpfn, mfn; struct domain *d = a->domain, *curr_d = current->domain; + bool_t need_tlbflush = 0; + uint32_t tlbflush_timestamp = 0; if ( !guest_handle_subrange_okay(a->extent_list, a->nr_done, a->nr_extents-1) ) @@ -150,6 +152,12 @@ static void populate_physmap(struct memop_args *a) max_order(curr_d)) ) return; + /* MEMF_no_tlbflush can be set only during vm creation phase when + * already_scheduled is still 0 before this domain gets scheduled for + * the first time. */ + if ( d->already_scheduled == 0 ) + a->memflags |= MEMF_no_tlbflush; + for ( i = a->nr_done; i < a->nr_extents; i++ ) { if ( i != a->nr_done && hypercall_preempt_check() ) @@ -214,6 +222,21 @@ static void populate_physmap(struct memop_args *a) goto out; } + if ( d->already_scheduled == 0 ) + { + for ( j = 0; j < (1U << a->extent_order); j++ ) + { + if ( page[j].u.free.need_tlbflush && + (page[j].tlbflush_timestamp <= tlbflush_current_time()) && + (!need_tlbflush || + (page[j].tlbflush_timestamp > tlbflush_timestamp)) ) + { + need_tlbflush = 1; + tlbflush_timestamp = page[j].tlbflush_timestamp; + } + } + } + mfn = page_to_mfn(page); } @@ -232,6 +255,16 @@ static void populate_physmap(struct memop_args *a) } out: + if ( need_tlbflush ) + { + cpumask_t mask = cpu_online_map; + tlbflush_filter(mask, tlbflush_timestamp); + if ( !cpumask_empty(&mask) ) + { + perfc_incr(need_flush_tlb_flush); + flush_tlb_mask(&mask); + } + } a->nr_done = i; } diff --git a/xen/common/page_alloc.c b/xen/common/page_alloc.c index 18ff6cf..e0283fc 100644 --- a/xen/common/page_alloc.c +++ b/xen/common/page_alloc.c @@ -827,7 +827,8 @@ static struct page_info *alloc_heap_pages( BUG_ON(pg[i].count_info != PGC_state_free); pg[i].count_info = PGC_state_inuse; - if ( pg[i].u.free.need_tlbflush && + if ( !(memflags & MEMF_no_tlbflush) && + pg[i].u.free.need_tlbflush && (pg[i].tlbflush_timestamp <= tlbflush_current_time()) && (!need_tlbflush || (pg[i].tlbflush_timestamp > tlbflush_timestamp)) ) diff --git a/xen/common/schedule.c b/xen/common/schedule.c index 32a300f..593541a 100644 --- a/xen/common/schedule.c +++ b/xen/common/schedule.c @@ -1376,6 +1376,11 @@ static void schedule(void) next = next_slice.task; + /* Set already_scheduled to 1 when this domain gets scheduled for the + * first time */ + if ( next->domain->already_scheduled == 0 ) + next->domain->already_scheduled = 1; + sd->curr = next; if ( next_slice.time >= 0 ) /* -ve means no limit */ diff --git a/xen/include/xen/mm.h b/xen/include/xen/mm.h index 58bc0b8..880ca88 100644 --- a/xen/include/xen/mm.h +++ b/xen/include/xen/mm.h @@ -221,6 +221,8 @@ struct npfec { #define MEMF_exact_node (1U<<_MEMF_exact_node) #define _MEMF_no_owner 5 #define MEMF_no_owner (1U<<_MEMF_no_owner) +#define _MEMF_no_tlbflush 6 +#define MEMF_no_tlbflush (1U<<_MEMF_no_tlbflush) #define _MEMF_node 8 #define MEMF_node_mask ((1U << (8 * sizeof(nodeid_t))) - 1) #define MEMF_node(n) ((((n) + 1) & MEMF_node_mask) << _MEMF_node) diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h index 2f9c15f..cbd8329 100644 --- a/xen/include/xen/sched.h +++ b/xen/include/xen/sched.h @@ -474,6 +474,9 @@ struct domain unsigned int guest_request_enabled : 1; unsigned int guest_request_sync : 1; } monitor; + + /* set to 1 the first time this domain gets scheduled. */ + bool_t already_scheduled; }; /* Protect updates/reads (resp.) of domain_list and domain_hash. */ -- 1.9.1 _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxx https://lists.xen.org/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |