[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [PATCH 2/6] xenbus: implement the xenwatch multithreading framework



> -----Original Message-----
> From: Dongli Zhang [mailto:dongli.zhang@xxxxxxxxxx]
> Sent: 14 September 2018 08:34
> To: xen-devel@xxxxxxxxxxxxxxxxxxxx; linux-kernel@xxxxxxxxxxxxxxx
> Cc: boris.ostrovsky@xxxxxxxxxx; jgross@xxxxxxxx; Paul Durrant
> <Paul.Durrant@xxxxxxxxxx>; Wei Liu <wei.liu2@xxxxxxxxxx>;
> konrad.wilk@xxxxxxxxxx; Roger Pau Monne <roger.pau@xxxxxxxxxx>;
> srinivas.eeda@xxxxxxxxxx
> Subject: [PATCH 2/6] xenbus: implement the xenwatch multithreading
> framework
> 
> This is the 2nd patch of a (6-patch) patch set.
> 
> This patch implements the xenwatch multithreading framework to create or
> destroy the per-domU xenwatch thread. The xenwatch thread is created or
> destroyed during xenbus device probing or removing (that is,
> xenbus_dev_probe() or xenbus_dev_remove()) if the corresponding pv driver
> has xenwatch multithreading feature enabled. As there is only one single
> per-domU xenwatch thread for each domid, probing the xenbus device for the
> same domid again would not create the thread for the same domid again, but
> only increment the reference count of the thread's mtwatch domain. When a
> xenbus device is removed, the reference count is decremented. The per-domU
> xenwatch thread is destroyed when the reference count of its mtwatch
> domain
> is zero, that is, al xenbus devices (whose mtwatch feature is enabled) of
> such mtwatch domain are removed.
> 
> Therefore, a domid has its own per-domU xenwatch thread only when it is
> attached with dom0 backend xenbus device whose pv driver has the feature
> enabled. The domid would not have its own xenwatch thread when it is not
> running any mtwatch-enabled xenbus device.
> 
> When a watch (with xenwatch multithreading enabled) is unregistered, we
> will generally traverse all mtwatch domains to remove all inflight pending
> events fired by such watch. However, one optimization in this patch is we
> only need to remove pending events from a specific mtwatch domain when the
> watch is registered for a specific domid, that is, when its owner_id field
> is non-zero.
> 
> Signed-off-by: Dongli Zhang <dongli.zhang@xxxxxxxxxx>
> ---
>  drivers/xen/xenbus/xenbus_probe.c |   6 +
>  drivers/xen/xenbus/xenbus_xs.c    | 273
> ++++++++++++++++++++++++++++++++++++++
>  include/xen/xenbus.h              |   3 +
>  3 files changed, 282 insertions(+)
> 
> diff --git a/drivers/xen/xenbus/xenbus_probe.c
> b/drivers/xen/xenbus/xenbus_probe.c
> index 5b47188..5755596 100644
> --- a/drivers/xen/xenbus/xenbus_probe.c
> +++ b/drivers/xen/xenbus/xenbus_probe.c
> @@ -236,6 +236,9 @@ int xenbus_dev_probe(struct device *_dev)
>       if (err)
>               goto fail;
> 
> +     if (xen_mtwatch && drv->use_mtwatch)
> +             mtwatch_create_domain(dev->otherend_id);

mtwatch_get_domain()? (Since you have mtwatch_put_domain() below).

> +
>       err = watch_otherend(dev);
>       if (err) {
>               dev_warn(&dev->dev, "watch_otherend on %s failed.\n",
> @@ -263,6 +266,9 @@ int xenbus_dev_remove(struct device *_dev)
>       if (drv->remove)
>               drv->remove(dev);
> 
> +     if (xen_mtwatch && drv->use_mtwatch)
> +             mtwatch_put_domain(dev->otherend_id);
> +
>       free_otherend_details(dev);
> 
>       xenbus_switch_state(dev, XenbusStateClosed);
> diff --git a/drivers/xen/xenbus/xenbus_xs.c
> b/drivers/xen/xenbus/xenbus_xs.c
> index 3f137d2..741dc54 100644
> --- a/drivers/xen/xenbus/xenbus_xs.c
> +++ b/drivers/xen/xenbus/xenbus_xs.c
> @@ -108,6 +108,201 @@ static __init int xen_parse_mtwatch(char *arg)
>  }
>  early_param("xen_mtwatch", xen_parse_mtwatch);
> 
> +struct mtwatch_domain *mtwatch_find_domain(domid_t domid)
> +{
> +     struct mtwatch_domain *domain;
> +     int hash = MTWATCH_HASH(domid);
> +     struct hlist_head *hash_head = &mtwatch_info->domain_hash[hash];
> +
> +     hlist_for_each_entry_rcu(domain, hash_head, hash_node) {
> +             if (domain->domid == domid)
> +                     return domain;
> +     }
> +
> +     return NULL;
> +}
> +
> +/* per-domU thread for xenwatch multithreading. */
> +static int mtwatch_thread(void *arg)
> +{
> +     struct mtwatch_domain *domain = (struct mtwatch_domain *) arg;
> +     struct list_head *ent;
> +     struct xs_watch_event *event;
> +
> +     domain->pid = current->pid;
> +
> +     for (;;) {
> +             wait_event_interruptible(domain->events_wq,
> +                                      !list_empty(&domain->events) ||
> +                                      domain->state == MTWATCH_DOMAIN_DOWN);
> +
> +             if (domain->state == MTWATCH_DOMAIN_DOWN &&
> +                 list_empty(&domain->events))
> +                     break;
> +
> +             mutex_lock(&domain->domain_mutex);
> +
> +             spin_lock(&domain->events_lock);
> +             ent = domain->events.next;
> +             if (ent != &domain->events)
> +                     list_del(ent);
> +             spin_unlock(&domain->events_lock);
> +
> +             if (ent != &domain->events) {
> +                     event = list_entry(ent, struct xs_watch_event, list);
> +                     event->handle->callback(event->handle, event->path,
> +                                             event->token);
> +                     kfree(event);
> +             }
> +
> +             mutex_unlock(&domain->domain_mutex);
> +     }
> +
> +     /*
> +      * domain->state is already set to MTWATCH_DOMAIN_DOWN (to avoid
> +      * new event to domain->events) when above for loop breaks, so
> +      * that there is no requirement to cleanup domain->events again.
> +      */
> +
> +     spin_lock(&mtwatch_info->domain_lock);
> +     list_del_rcu(&domain->list_node);
> +     spin_unlock(&mtwatch_info->domain_lock);
> +
> +     spin_lock(&mtwatch_info->purge_lock);
> +     list_add(&domain->purge_node, &mtwatch_info->purge_list);
> +     spin_unlock(&mtwatch_info->purge_lock);
> +
> +     schedule_work(&mtwatch_info->purge_work);
> +
> +     return 0;
> +}
> +
> +static void delayed_destroy_domain(struct rcu_head *head)
> +{
> +     struct mtwatch_domain *domain;
> +
> +     domain = container_of(head, struct mtwatch_domain, rcu);
> +     kfree(domain);
> +}
> +
> +static void xen_mtwatch_purge_domain(struct work_struct *work)
> +{
> +     struct mtwatch_domain *domain;
> +     struct list_head *node;
> +
> +     while (!list_empty(&mtwatch_info->purge_list)) {
> +
> +             spin_lock(&mtwatch_info->purge_lock);
> +             node = mtwatch_info->purge_list.next;
> +             if (node != &mtwatch_info->purge_list)
> +                     list_del(node);
> +             spin_unlock(&mtwatch_info->purge_lock);
> +
> +             if (node != &mtwatch_info->purge_list) {
> +                     domain = list_entry(node, struct mtwatch_domain,
> +                                         purge_node);
> +                     kthread_stop(domain->task);
> +
> +                     call_rcu(&domain->rcu, delayed_destroy_domain);
> +             }
> +     }
> +}
> +
> +/* Running in the context of default xenwatch kthread. */
> +void mtwatch_create_domain(domid_t domid)
> +{
> +     struct mtwatch_domain *domain;
> +
> +     if (!domid) {
> +             pr_err("Default xenwatch thread is for dom0\n");
> +             return;
> +     }
> +
> +     spin_lock(&mtwatch_info->domain_lock);
> +
> +     domain = mtwatch_find_domain(domid);
> +     if (domain) {
> +             atomic_inc(&domain->refcnt);
> +             spin_unlock(&mtwatch_info->domain_lock);
> +             return;
> +     }
> +
> +     domain = kzalloc(sizeof(*domain), GFP_ATOMIC);
> +     if (!domain) {
> +             spin_unlock(&mtwatch_info->domain_lock);
> +             pr_err("Failed to allocate memory for mtwatch thread %d\n",
> +                    domid);
> +             return;
> +     }
> +
> +     domain->domid = domid;
> +     atomic_set(&domain->refcnt, 1);
> +     mutex_init(&domain->domain_mutex);
> +     INIT_LIST_HEAD(&domain->purge_node);
> +
> +     init_waitqueue_head(&domain->events_wq);
> +     spin_lock_init(&domain->events_lock);
> +     INIT_LIST_HEAD(&domain->events);
> +
> +     list_add_tail_rcu(&domain->list_node, &mtwatch_info->domain_list);
> +
> +     hlist_add_head_rcu(&domain->hash_node,
> +                        &mtwatch_info->domain_hash[MTWATCH_HASH(domid)]);
> +
> +     spin_unlock(&mtwatch_info->domain_lock);
> +
> +     domain->task = kthread_run(mtwatch_thread, domain,
> +                                "xen-mtwatch-%d", domid);
> +
> +     if (!domain->task) {
> +             pr_err("mtwatch kthread creation is failed\n");
> +             domain->state = MTWATCH_DOMAIN_DOWN;
> +
> +             return;
> +     }
> +
> +     domain->state = MTWATCH_DOMAIN_UP;
> +}
> +
> +/* Running in the context of default xenwatch kthread. */
> +void mtwatch_put_domain(domid_t domid)
> +{
> +     struct mtwatch_domain *domain;
> +
> +     spin_lock(&mtwatch_info->domain_lock);
> +
> +     domain = mtwatch_find_domain(domid);
> +     if (!domain) {
> +             spin_unlock(&mtwatch_info->domain_lock);
> +             pr_err("mtwatch kthread for domid=%d does not exist\n",
> +                    domid);
> +             return;
> +     }
> +
> +     if (atomic_dec_and_test(&domain->refcnt)) {
> +
> +             hlist_del_rcu(&domain->hash_node);
> +
> +             if (!domain->task) {
> +                     /*
> +                      * As the task is failed to initialize during
> +                      * mtwatch_create_domain(), we do not need to wait
> +                      * for the kernel thread to complete.
> +                      */
> +                     list_del_rcu(&domain->list_node);
> +                     call_rcu(&domain->rcu, delayed_destroy_domain);
> +             } else {
> +                     spin_lock(&domain->events_lock);
> +                     domain->state = MTWATCH_DOMAIN_DOWN;
> +                     spin_unlock(&domain->events_lock);
> +
> +                     wake_up(&domain->events_wq);
> +             }
> +     }
> +
> +     spin_unlock(&mtwatch_info->domain_lock);
> +}
> +
>  static void xs_suspend_enter(void)
>  {
>       spin_lock(&xs_state_lock);
> @@ -793,6 +988,80 @@ int register_xenbus_watch(struct xenbus_watch *watch)
>  }
>  EXPORT_SYMBOL_GPL(register_xenbus_watch);
> 
> +static void __unregister_single_mtwatch(struct xenbus_watch *watch,
> +                                     struct mtwatch_domain *domain)
> +{
> +     struct xs_watch_event *event, *tmp;
> +
> +     if (current->pid != domain->pid)
> +             mutex_lock(&domain->domain_mutex);

Since you avoid the lock here, what's to stop another thread with a different 
pid now racing with this?

  Paul

> +
> +     spin_lock(&domain->events_lock);
> +     list_for_each_entry_safe(event, tmp,
> +                              &domain->events, list) {
> +             if (event->handle != watch)
> +                     continue;
> +             list_del(&event->list);
> +             kfree(event);
> +     }
> +     spin_unlock(&domain->events_lock);
> +
> +     if (current->pid != domain->pid)
> +             mutex_unlock(&domain->domain_mutex);
> +}
> +
> +static void unregister_single_mtwatch(struct xenbus_watch *watch,
> +                                   domid_t domid)
> +{
> +     struct mtwatch_domain *domain;
> +     bool found = false;
> +
> +     rcu_read_lock();
> +
> +     list_for_each_entry_rcu(domain, &mtwatch_info->domain_list,
> +                             list_node) {
> +             if (domain->domid == domid) {
> +                     found = true;
> +                     __unregister_single_mtwatch(watch, domain);
> +             }
> +     }
> +
> +     WARN_ON_ONCE(unlikely(!found));
> +
> +     rcu_read_unlock();
> +}
> +
> +static void unregister_all_mtwatch(struct xenbus_watch *watch)
> +{
> +     struct mtwatch_domain *domain;
> +
> +     rcu_read_lock();
> +
> +     list_for_each_entry_rcu(domain, &mtwatch_info->domain_list,
> +                             list_node) {
> +             __unregister_single_mtwatch(watch, domain);
> +     }
> +
> +     rcu_read_unlock();
> +}
> +
> +static void unregister_mtwatch(struct xenbus_watch *watch)
> +{
> +     /*
> +      * Generally, to unregister a watch. we need to traverse all
> +      * mtwatch domains to remove all inflight pending watch events for
> +      * such watch.
> +      *
> +      * One exception is we only need to remove pending watch events
> +      * from a single mtwatch domain when the watch is registered for a
> +      * specific domid.
> +      */
> +     if (watch->owner_id)
> +             unregister_single_mtwatch(watch, watch->owner_id);
> +     else
> +             unregister_all_mtwatch(watch);
> +}
> +
>  void unregister_xenbus_watch(struct xenbus_watch *watch)
>  {
>       struct xs_watch_event *event, *tmp;
> @@ -831,6 +1100,9 @@ void unregister_xenbus_watch(struct xenbus_watch
> *watch)
> 
>       if (current->pid != xenwatch_pid)
>               mutex_unlock(&xenwatch_mutex);
> +
> +     if (xen_mtwatch && watch->get_domid)
> +             unregister_mtwatch(watch);
>  }
>  EXPORT_SYMBOL_GPL(unregister_xenbus_watch);
> 
> @@ -954,6 +1226,7 @@ int xs_init(void)
> 
>               spin_lock_init(&mtwatch_info->purge_lock);
>               INIT_LIST_HEAD(&mtwatch_info->purge_list);
> +             INIT_WORK(&mtwatch_info->purge_work,
> xen_mtwatch_purge_domain);
> 
>               xen_mtwatch = true;
> 
> diff --git a/include/xen/xenbus.h b/include/xen/xenbus.h
> index e807114..4ac2cee 100644
> --- a/include/xen/xenbus.h
> +++ b/include/xen/xenbus.h
> @@ -241,6 +241,9 @@ extern const struct file_operations xen_xenbus_fops;
>  extern struct xenstore_domain_interface *xen_store_interface;
>  extern int xen_store_evtchn;
> 
> +void mtwatch_create_domain(domid_t domid);
> +void mtwatch_put_domain(domid_t domid);
> +
>  extern bool xen_mtwatch;
> 
>  #define MTWATCH_HASH_SIZE 256
> --
> 2.7.4


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxxx
https://lists.xenproject.org/mailman/listinfo/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.