[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] [PATCH] Reduce overhead in find_domain_by_id() [2/2]
This is a set of patches to improve performance of find_domain_by_id(). find_domain_by_id shows up high in profiles for network I/O intensive workloads. Most of the cost for this function comes from 3 main functions (of aproximate equal costs): 1)read_lock(), 2)read_unlock() and 3)get_domain(). These patches replace the lock used for accessing domain_list and domain_hash with a lock free RCU scheme. Experiments confirm that the cost of find_domain_by_id() is in fact reduced by 2/3. The patches apply cleanly to changeset 12732. Renato Patch: 2/2 - replace domlist_lock operations by RCU operations ======================================================= Signed-off-by: Jose Renato Santos <jsantos@xxxxxxxxxx> # HG changeset patch # User jsantos@xxxxxxxxxx # Node ID 8d07c0a68759ab97cb7917bdf65144f20a5568ca # Parent 555277cafc3576a3a96bd7ec345c3871b18aa867 Modify accesses to domain_list and domain_hash to use RCU functions instead of rwlock diff -r 555277cafc35 -r 8d07c0a68759 xen/acm/acm_chinesewall_hooks.c --- a/xen/acm/acm_chinesewall_hooks.c Tue Dec 5 14:47:26 2006 -0800 +++ b/xen/acm/acm_chinesewall_hooks.c Tue Dec 5 15:31:44 2006 -0800 @@ -196,7 +196,7 @@ chwall_init_state(struct acm_chwall_poli ssidref_t chwall_ssidref; struct domain **pd; - write_lock(&domlist_lock); + spin_lock(&domlist_lock); /* go through all domains and adjust policy as if this domain was started now */ pd = &domain_list; for (pd = &domain_list; *pd != NULL; pd = &(*pd)->next_in_list) @@ -247,7 +247,7 @@ chwall_init_state(struct acm_chwall_poli } } out: - write_unlock(&domlist_lock); + spin_unlock(&domlist_lock); return violation; /* returning "violation != 0" means that the currently running set of domains would * not be possible if the new policy had been enforced before starting them; for chinese diff -r 555277cafc35 -r 8d07c0a68759 xen/acm/acm_simple_type_enforcement_hooks.c --- a/xen/acm/acm_simple_type_enforcement_hooks.c Tue Dec 5 14:47:26 2006 -0800 +++ b/xen/acm/acm_simple_type_enforcement_hooks.c Tue Dec 5 15:31:44 2006 -0800 @@ -175,36 +175,37 @@ ste_init_state(struct acm_ste_policy_buf int violation = 1; struct ste_ssid *ste_ssid, *ste_rssid; ssidref_t ste_ssidref, ste_rssidref; - struct domain **pd, *rdom; + struct domain *d, *rdom; domid_t rdomid; grant_entry_t sha_copy; int port, i; - read_lock(&domlist_lock); /* go by domain? or directly by global? event/grant list */ + rcu_read_lock(); /* go by domain? or directly by global? event/grant list */ /* go through all domains and adjust policy as if this domain was started now */ - pd = &domain_list; - for ( pd = &domain_list; *pd != NULL; pd = &(*pd)->next_in_list ) { + for ( d = rcu_dereference(domain_list); + d != NULL; + d = rcu_dereference(d->next_in_list) ) { ste_ssid = GET_SSIDP(ACM_SIMPLE_TYPE_ENFORCEMENT_POLICY, - (struct acm_ssid_domain *)(*pd)->ssid); + (struct acm_ssid_domain *)d->ssid); ste_ssidref = ste_ssid->ste_ssidref; traceprintk("%s: validating policy for eventch domain %x (ste-Ref=%x).\n", - __func__, (*pd)->domain_id, ste_ssidref); + __func__, d->domain_id, ste_ssidref); /* a) check for event channel conflicts */ for (port=0; port < NR_EVTCHN_BUCKETS; port++) { - spin_lock(&(*pd)->evtchn_lock); - if ((*pd)->evtchn[port] == NULL) { - spin_unlock(&(*pd)->evtchn_lock); + spin_lock(&d->evtchn_lock); + if (d->evtchn[port] == NULL) { + spin_unlock(d->evtchn_lock); continue; } - if ((*pd)->evtchn[port]->state == ECS_INTERDOMAIN) { - rdom = (*pd)->evtchn[port]->u.interdomain.remote_dom; + if (d->evtchn[port]->state == ECS_INTERDOMAIN) { + rdom = d->evtchn[port]->u.interdomain.remote_dom; rdomid = rdom->domain_id; /* rdom now has remote domain */ ste_rssid = GET_SSIDP(ACM_SIMPLE_TYPE_ENFORCEMENT_POLICY, (struct acm_ssid_domain *)(rdom->ssid)); ste_rssidref = ste_rssid->ste_ssidref; - } else if ((*pd)->evtchn[port]->state == ECS_UNBOUND) { - rdomid = (*pd)->evtchn[port]->u.unbound.remote_domid; + } else if (d->evtchn[port]->state == ECS_UNBOUND) { + rdomid = d->evtchn[port]->u.unbound.remote_domid; if ((rdom = find_domain_by_id(rdomid)) == NULL) { printk("%s: Error finding domain to id %x!\n", __func__, rdomid); goto out; @@ -215,34 +216,34 @@ ste_init_state(struct acm_ste_policy_buf ste_rssidref = ste_rssid->ste_ssidref; put_domain(rdom); } else { - spin_unlock(&(*pd)->evtchn_lock); + spin_unlock(&d->evtchn_lock); continue; /* port unused */ } - spin_unlock(&(*pd)->evtchn_lock); + spin_unlock(&d->evtchn_lock); /* rdom now has remote domain */ ste_rssid = GET_SSIDP(ACM_SIMPLE_TYPE_ENFORCEMENT_POLICY, (struct acm_ssid_domain *)(rdom->ssid)); ste_rssidref = ste_rssid->ste_ssidref; traceprintk("%s: eventch: domain %x (ssidref %x) --> domain %x (rssidref %x) used (port %x).\n", - __func__, (*pd)->domain_id, ste_ssidref, rdom->domain_id, ste_rssidref, port); + __func__, d->domain_id, ste_ssidref, rdom->domain_id, ste_rssidref, port); /* check whether on subj->ssid, obj->ssid share a common type*/ if (!have_common_type(ste_ssidref, ste_rssidref)) { printkd("%s: Policy violation in event channel domain %x -> domain %x.\n", - __func__, (*pd)->domain_id, rdomid); + __func__, d->domain_id, rdomid); goto out; } } /* b) check for grant table conflicts on shared pages */ - if ((*pd)->grant_table->shared == NULL) { - printkd("%s: Grant ... sharing for domain %x not setup!\n", __func__, (*pd)->domain_id); + if (d->grant_table->shared == NULL) { + printkd("%s: Grant ... sharing for domain %x not setup!\n", __func__, (d->domain_id); continue; } for ( i = 0; i < NR_GRANT_ENTRIES; i++ ) { - sha_copy = (*pd)->grant_table->shared[i]; + sha_copy = (d->grant_table->shared[i]; if ( sha_copy.flags ) { printkd("%s: grant dom (%hu) SHARED (%d) flags:(%hx) dom:(%hu) frame:(%lx)\n", - __func__, (*pd)->domain_id, i, sha_copy.flags, sha_copy.domid, + __func__, (d->domain_id, i, sha_copy.flags, sha_copy.domid, (unsigned long)sha_copy.frame); rdomid = sha_copy.domid; if ((rdom = find_domain_by_id(rdomid)) == NULL) { @@ -256,7 +257,7 @@ ste_init_state(struct acm_ste_policy_buf put_domain(rdom); if (!have_common_type(ste_ssidref, ste_rssidref)) { printkd("%s: Policy violation in grant table sharing domain %x -> domain %x.\n", - __func__, (*pd)->domain_id, rdomid); + __func__, d->domain_id, rdomid); goto out; } } @@ -264,7 +265,7 @@ ste_init_state(struct acm_ste_policy_buf } violation = 0; out: - read_unlock(&domlist_lock); + rcu_read_unlock(); return violation; /* returning "violation != 0" means that existing sharing between domains would not * have been allowed if the new policy had been enforced before the sharing; for ste, @@ -280,7 +281,7 @@ ste_set_policy(u8 *buf, u32 buf_size) struct acm_ste_policy_buffer *ste_buf = (struct acm_ste_policy_buffer *)buf; void *ssidrefsbuf; struct ste_ssid *ste_ssid; - struct domain **pd; + struct domain *d; int i; if (buf_size < sizeof(struct acm_ste_policy_buffer)) @@ -325,15 +326,16 @@ ste_set_policy(u8 *buf, u32 buf_size) ste_bin_pol.ssidrefs = (domaintype_t *)ssidrefsbuf; /* clear all ste caches */ - read_lock(&domlist_lock); - pd = &domain_list; - for ( pd = &domain_list; *pd != NULL; pd = &(*pd)->next_in_list ) { + rcu_read_lock(); + for ( d = rcu_dereference(domain_list); + d != NULL; + d = rcu_dereference(d->next_in_list) ) { ste_ssid = GET_SSIDP(ACM_SIMPLE_TYPE_ENFORCEMENT_POLICY, - (struct acm_ssid_domain *)(*pd)->ssid); + (struct acm_ssid_domain *)(d)->ssid); for (i=0; i<ACM_TE_CACHE_SIZE; i++) ste_ssid->ste_cache[i].valid = ACM_STE_free; } - read_unlock(&domlist_lock); + rcu_read_unlock(); return ACM_OK; error_free: @@ -435,14 +437,15 @@ clean_id_from_cache(domid_t id) { struct ste_ssid *ste_ssid; int i; - struct domain **pd; + struct domain *d; struct acm_ssid_domain *ssid; printkd("deleting cache for dom %x.\n", id); - read_lock(&domlist_lock); /* look through caches of all domains */ - pd = &domain_list; - for ( pd = &domain_list; *pd != NULL; pd = &(*pd)->next_in_list ) { - ssid = (struct acm_ssid_domain *)((*pd)->ssid); + rcu_read_lock(); /* look through caches of all domains */ + for ( d = rcu_dereference(domain_list); + d != NULL; + d = rcu_dereference(d->next_in_list ) { + ssid = (struct acm_ssid_domain *)(d->ssid); if (ssid == NULL) continue; /* hanging domain structure, no ssid any more ... */ @@ -458,7 +461,7 @@ clean_id_from_cache(domid_t id) ste_ssid->ste_cache[i].valid = ACM_STE_free; } out: - read_unlock(&domlist_lock); + rcu_read_unlock(); } /*************************** diff -r 555277cafc35 -r 8d07c0a68759 xen/arch/ia64/xen/flushtlb.c --- a/xen/arch/ia64/xen/flushtlb.c Tue Dec 5 14:47:26 2006 -0800 +++ b/xen/arch/ia64/xen/flushtlb.c Tue Dec 5 15:31:44 2006 -0800 @@ -78,7 +78,7 @@ new_tlbflush_clock_period(void) struct domain* d; struct vcpu* v; /* flush all vhpt of vcpu of all existing domain. */ - read_lock(&domlist_lock); + rcu_read_lock(); for_each_domain(d) { for_each_vcpu(d, v) { vcpu_purge_tr_entry(&PSCBX(v,dtlb)); @@ -92,7 +92,7 @@ new_tlbflush_clock_period(void) vcpu_vhpt_flush(v); } } - read_unlock(&domlist_lock); + rcu_read_unlock(); /* unlock has release semantics */ /* flush all vhpt of physical cpu and mTLB */ diff -r 555277cafc35 -r 8d07c0a68759 xen/arch/x86/time.c --- a/xen/arch/x86/time.c Tue Dec 5 14:47:26 2006 -0800 +++ b/xen/arch/x86/time.c Tue Dec 5 15:31:44 2006 -0800 @@ -720,10 +720,10 @@ void do_settime(unsigned long secs, unsi wc_nsec = _wc_nsec = (u32)y; spin_unlock(&wc_lock); - read_lock(&domlist_lock); + rcu_read_lock(); for_each_domain ( d ) update_domain_wallclock_time(d); - read_unlock(&domlist_lock); + rcu_read_unlock(); } static void local_time_calibration(void *unused) diff -r 555277cafc35 -r 8d07c0a68759 xen/common/domain.c --- a/xen/common/domain.c Tue Dec 5 14:47:26 2006 -0800 +++ b/xen/common/domain.c Tue Dec 5 15:31:44 2006 -0800 @@ -23,14 +23,19 @@ #include <xen/shutdown.h> #include <xen/percpu.h> #include <xen/multicall.h> +#include <xen/rcupdate.h> #include <asm/debugger.h> #include <public/sched.h> #include <public/vcpu.h> -/* Both these structures are protected by the domlist_lock. */ -DEFINE_RWLOCK(domlist_lock); +/* domain_list and domain_hash are protected by the same RCU functions + * they are not modified atomically and may be inconsistent with each other + * thus, only one of these can be used on each RCU reader-side + * critical section */ struct domain *domain_hash[DOMAIN_HASH_SIZE]; struct domain *domain_list; +/* spin lock for domain_list and domain_hash (RCU) */ +DEFINE_SPINLOCK(domlist_lock); struct domain *dom0; @@ -170,16 +175,20 @@ struct domain *domain_create(domid_t dom if ( !is_idle_domain(d) ) { - write_lock(&domlist_lock); + spin_lock(&domlist_lock); pd = &domain_list; /* NB. domain_list maintained in order of domid. */ for ( pd = &domain_list; *pd != NULL; pd = &(*pd)->next_in_list ) if ( (*pd)->domain_id > d->domain_id ) break; d->next_in_list = *pd; - *pd = d; d->next_in_hashbucket = domain_hash[DOMAIN_HASH(domid)]; - domain_hash[DOMAIN_HASH(domid)] = d; - write_unlock(&domlist_lock); + /* Two rcu assignments are not atomic + * Readers may see inconsistent domlist and hash table + * That is OK as long as each RCU reader-side critical section uses + * only one or them */ + rcu_assign_pointer(*pd, d); + rcu_assign_pointer(domain_hash[DOMAIN_HASH(domid)], d); + spin_unlock(&domlist_lock); } return d; @@ -203,8 +212,8 @@ struct domain *find_domain_by_id(domid_t { struct domain *d; - read_lock(&domlist_lock); - d = domain_hash[DOMAIN_HASH(dom)]; + rcu_read_lock(); + d = rcu_dereference(domain_hash[DOMAIN_HASH(dom)]); while ( d != NULL ) { if ( d->domain_id == dom ) @@ -213,9 +222,9 @@ struct domain *find_domain_by_id(domid_t d = NULL; break; } - d = d->next_in_hashbucket; - } - read_unlock(&domlist_lock); + d = rcu_dereference(d->next_in_hashbucket); + } + rcu_read_unlock(); return d; } @@ -306,6 +315,23 @@ void domain_pause_for_debugger(void) send_guest_global_virq(dom0, VIRQ_DEBUGGER); } +/* Complete domain destroy after RCU readers are not holding + old references */ +static void complete_domain_destroy(struct rcu_head *head) +{ + struct domain *d = container_of(head, struct domain, rcu); + + rangeset_domain_destroy(d); + + evtchn_destroy(d); + grant_table_destroy(d); + + arch_domain_destroy(d); + + free_domain(d); + + send_guest_global_virq(dom0, VIRQ_DOM_EXC); +} /* Release resources belonging to task @p. */ void domain_destroy(struct domain *d) @@ -323,27 +349,19 @@ void domain_destroy(struct domain *d) return; /* Delete from task list and task hashtable. */ - write_lock(&domlist_lock); + spin_lock(&domlist_lock); pd = &domain_list; while ( *pd != d ) pd = &(*pd)->next_in_list; - *pd = d->next_in_list; + rcu_assign_pointer(*pd, d->next_in_list); pd = &domain_hash[DOMAIN_HASH(d->domain_id)]; while ( *pd != d ) pd = &(*pd)->next_in_hashbucket; - *pd = d->next_in_hashbucket; - write_unlock(&domlist_lock); - - rangeset_domain_destroy(d); - - evtchn_destroy(d); - grant_table_destroy(d); - - arch_domain_destroy(d); - - free_domain(d); - - send_guest_global_virq(dom0, VIRQ_DOM_EXC); + rcu_assign_pointer(*pd, d->next_in_hashbucket); + spin_unlock(&domlist_lock); + + /* schedule RCU asynchronous completion of domain destroy */ + call_rcu(&d->rcu, complete_domain_destroy); } void vcpu_pause(struct vcpu *v) diff -r 555277cafc35 -r 8d07c0a68759 xen/common/domctl.c --- a/xen/common/domctl.c Tue Dec 5 14:47:26 2006 -0800 +++ b/xen/common/domctl.c Tue Dec 5 15:31:44 2006 -0800 @@ -17,6 +17,7 @@ #include <xen/trace.h> #include <xen/console.h> #include <xen/iocap.h> +#include <xen/rcupdate.h> #include <xen/guest_access.h> #include <asm/current.h> #include <public/domctl.h> @@ -139,12 +140,12 @@ static unsigned int default_vcpu0_locati cpumask_t cpu_exclude_map; /* Do an initial CPU placement. Pick the least-populated CPU. */ - read_lock(&domlist_lock); + rcu_read_lock(); for_each_domain ( d ) for_each_vcpu ( d, v ) if ( !test_bit(_VCPUF_down, &v->vcpu_flags) ) cnt[v->processor]++; - read_unlock(&domlist_lock); + rcu_read_unlock(); /* * If we're on a HT system, we only auto-allocate to a non-primary HT. We @@ -417,7 +418,7 @@ long do_domctl(XEN_GUEST_HANDLE(xen_domc if ( dom == DOMID_SELF ) dom = current->domain->domain_id; - read_lock(&domlist_lock); + rcu_read_lock(); for_each_domain ( d ) { @@ -427,12 +428,12 @@ long do_domctl(XEN_GUEST_HANDLE(xen_domc if ( (d == NULL) || !get_domain(d) ) { - read_unlock(&domlist_lock); + rcu_read_unlock(); ret = -ESRCH; break; } - read_unlock(&domlist_lock); + rcu_read_unlock(); getdomaininfo(d, &op->u.getdomaininfo); diff -r 555277cafc35 -r 8d07c0a68759 xen/common/keyhandler.c --- a/xen/common/keyhandler.c Tue Dec 5 14:47:26 2006 -0800 +++ b/xen/common/keyhandler.c Tue Dec 5 15:31:44 2006 -0800 @@ -139,7 +139,7 @@ static void dump_domains(unsigned char k printk("'%c' pressed -> dumping domain info (now=0x%X:%08X)\n", key, (u32)(now>>32), (u32)now); - read_lock(&domlist_lock); + rcu_read_lock(); for_each_domain ( d ) { @@ -190,7 +190,7 @@ static void dump_domains(unsigned char k } } - read_unlock(&domlist_lock); + rcu_read_unlock(); } static cpumask_t read_clocks_cpumask = CPU_MASK_NONE; diff -r 555277cafc35 -r 8d07c0a68759 xen/common/sysctl.c --- a/xen/common/sysctl.c Tue Dec 5 14:47:26 2006 -0800 +++ b/xen/common/sysctl.c Tue Dec 5 15:31:44 2006 -0800 @@ -80,7 +80,7 @@ long do_sysctl(XEN_GUEST_HANDLE(xen_sysc struct xen_domctl_getdomaininfo info; u32 num_domains = 0; - read_lock(&domlist_lock); + rcu_read_lock(); for_each_domain ( d ) { @@ -108,7 +108,7 @@ long do_sysctl(XEN_GUEST_HANDLE(xen_sysc num_domains++; } - read_unlock(&domlist_lock); + rcu_read_unlock(); if ( ret != 0 ) break; diff -r 555277cafc35 -r 8d07c0a68759 xen/include/xen/sched.h --- a/xen/include/xen/sched.h Tue Dec 5 14:47:26 2006 -0800 +++ b/xen/include/xen/sched.h Tue Dec 5 15:31:44 2006 -0800 @@ -15,10 +15,11 @@ #include <xen/rangeset.h> #include <asm/domain.h> #include <xen/xenoprof.h> +#include <xen/rcupdate.h> #include <xen/irq.h> extern unsigned long volatile jiffies; -extern rwlock_t domlist_lock; +extern spinlock_t domlist_lock; /* A global pointer to the initial domain (DOM0). */ extern struct domain *dom0; @@ -172,6 +173,8 @@ struct domain /* OProfile support. */ struct xenoprof *xenoprof; int32_t time_offset_seconds; + + struct rcu_head rcu; }; struct domain_setup_info @@ -344,16 +347,19 @@ unsigned long hypercall_create_continuat local_events_need_delivery() \ )) -/* This domain_hash and domain_list are protected by the domlist_lock. */ +/* domain_list and domain_hash are protected by the same RCU functions + * they are not modified atomically and may be inconsistent with each other + * thus, only one of these can be used on each RCU reader-side + * critical section */ #define DOMAIN_HASH_SIZE 256 #define DOMAIN_HASH(_id) ((int)(_id)&(DOMAIN_HASH_SIZE-1)) extern struct domain *domain_hash[DOMAIN_HASH_SIZE]; extern struct domain *domain_list; #define for_each_domain(_d) \ - for ( (_d) = domain_list; \ + for ( (_d) = rcu_dereference(domain_list); \ (_d) != NULL; \ - (_d) = (_d)->next_in_list ) + (_d) = rcu_dereference((_d)->next_in_list )) \ #define for_each_vcpu(_d,_v) \ for ( (_v) = (_d)->vcpu[0]; \ Attachment:
rcu_domain_2.patch _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |