[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [PATCH 3/3] Xen physical cpus interface



On Thu, Apr 19, 2012 at 01:33:03PM +0000, Liu, Jinsong wrote:
> >From afdd30a7769e706e9be64f80d64136e2e267ecb9 Mon Sep 17 00:00:00 2001
> From: Liu, Jinsong <jinsong.liu@xxxxxxxxx>
> Date: Fri, 20 Apr 2012 05:11:58 +0800
> Subject: [PATCH 3/3] Xen physical cpus interface
> 
> Manage physical cpus in dom0, get physical cpus info and provide sys 
> interface.

Anything that exposes SysFS attributes needs documentation in 
Documentation/ABI

Can you explain what this solves? And if there are any
userland applications that use this?


> 
> Signed-off-by: Liu, Jinsong <jinsong.liu@xxxxxxxxx>
> Signed-off-by: Jiang, Yunhong <yunhong.jiang@xxxxxxxxx>
> ---
>  drivers/xen/Makefile             |    1 +
>  drivers/xen/pcpu.c               |  393 
> ++++++++++++++++++++++++++++++++++++++
>  include/xen/interface/platform.h |   10 +-
>  include/xen/interface/xen.h      |    1 +
>  4 files changed, 404 insertions(+), 1 deletions(-)
>  create mode 100644 drivers/xen/pcpu.c
> 
> diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile
> index 1d3e763..d12d14d 100644
> --- a/drivers/xen/Makefile
> +++ b/drivers/xen/Makefile
> @@ -19,6 +19,7 @@ obj-$(CONFIG_XEN_PVHVM)                     += 
> platform-pci.o
>  obj-$(CONFIG_XEN_TMEM)                       += tmem.o
>  obj-$(CONFIG_SWIOTLB_XEN)            += swiotlb-xen.o
>  obj-$(CONFIG_XEN_DOM0)                       += pci.o
> +obj-$(CONFIG_XEN_DOM0)                       += pcpu.o
>  obj-$(CONFIG_XEN_PCIDEV_BACKEND)     += xen-pciback/
>  obj-$(CONFIG_XEN_PRIVCMD)            += xen-privcmd.o
>  obj-$(CONFIG_XEN_ACPI_PROCESSOR)     += xen-acpi-processor.o
> diff --git a/drivers/xen/pcpu.c b/drivers/xen/pcpu.c
> new file mode 100644
> index 0000000..9295def
> --- /dev/null
> +++ b/drivers/xen/pcpu.c
> @@ -0,0 +1,393 @@
> +/******************************************************************************
> + * pcpu.c
> + * Management physical cpu in dom0, get pcpu info and provide sys interface
> + *
> + * Copyright (c) 2012 Intel Corporation
> + * Author: Liu, Jinsong <jinsong.liu@xxxxxxxxx>
> + * Author: Jiang, Yunhong <yunhong.jiang@xxxxxxxxx>
> + *
> + * This program is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU General Public License version 2
> + * as published by the Free Software Foundation; or, when distributed
> + * separately from the Linux kernel or incorporated into other
> + * software packages, subject to the following license:
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a 
> copy
> + * of this source file (the "Software"), to deal in the Software without
> + * restriction, including without limitation the rights to use, copy, modify,
> + * merge, publish, distribute, sublicense, and/or sell copies of the 
> Software,
> + * and to permit persons to whom the Software is furnished to do so, subject 
> to
> + * the following conditions:
> + *
> + * The above copyright notice and this permission notice shall be included in
> + * all copies or substantial portions of the Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 
> THE
> + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 
> DEALINGS
> + * IN THE SOFTWARE.
> + */
> +
> +#include <linux/interrupt.h>
> +#include <linux/spinlock.h>
> +#include <linux/cpu.h>
> +#include <linux/stat.h>
> +#include <xen/xen.h>
> +#include <xen/xenbus.h>
> +#include <xen/events.h>
> +#include <xen/interface/platform.h>
> +#include <asm/xen/hypervisor.h>
> +#include <asm/xen/hypercall.h>
> +
> +struct pcpu {
> +     struct list_head pcpu_list;
> +     struct device dev;
> +     uint32_t xen_id;

What is a 'xen_id'? Can you put a provide a comment
explaining the relationship between that and apic_id acpi_id
and flags?

Or better yet, just at the start of the structure have:

/*
 * @xen_id The ...
 * @apic_id ... so on.


> +     uint32_t apic_id;
> +     uint32_t acpi_id;
> +     uint32_t flags;
> +};
> +
> +static struct bus_type xen_pcpu_subsys = {

> +     .name = "xen_cpu",
> +     .dev_name = "xen_cpu",
> +};
> +
> +static DEFINE_MUTEX(xen_pcpu_lock);
> +#define get_pcpu_lock() mutex_lock(&xen_pcpu_lock);

Just use the mutex - no point of using the #define


> +#define put_pcpu_lock() mutex_unlock(&xen_pcpu_lock);
> +
> +#define XEN_PCPU "xen_cpu: "
> +
> +struct xen_pcpus {
> +     struct list_head list;

Uh, so it is just a list? 
> +};
> +static struct xen_pcpus xen_pcpus;

Can't you just do

static struct pcpu * xen_pcpus;

And then use that to add/remove items?

> +
> +static int xen_pcpu_down(uint32_t xen_id)
> +{
> +     int ret;
> +     struct xen_platform_op op = {
> +             .cmd                    = XENPF_cpu_offline,
> +             .interface_version      = XENPF_INTERFACE_VERSION,
> +             .u.cpu_ol.cpuid         = xen_id,
> +     };
> +
> +     ret = HYPERVISOR_dom0_op(&op);
> +     return ret;

Just collapse it in:
        return HYPERVISOR_dom0_op..

> +}
> +
> +static int xen_pcpu_up(uint32_t xen_id)
> +{
> +     int ret;
> +     struct xen_platform_op op = {
> +             .cmd                    = XENPF_cpu_online,
> +             .interface_version      = XENPF_INTERFACE_VERSION,
> +             .u.cpu_ol.cpuid         = xen_id,
> +     };
> +
> +     ret = HYPERVISOR_dom0_op(&op);
> +     return ret;

Ditto.

> +}
> +
> +static ssize_t show_online(struct device *dev,
> +                        struct device_attribute *attr,
> +                        char *buf)
> +{
> +     struct pcpu *cpu = container_of(dev, struct pcpu, dev);
> +
> +     return sprintf(buf, "%u\n", !!(cpu->flags & XEN_PCPU_FLAGS_ONLINE));
> +}
> +
> +static ssize_t __ref store_online(struct device *dev,
> +                               struct device_attribute *attr,
> +                               const char *buf, size_t count)
> +{
> +     struct pcpu *cpu = container_of(dev, struct pcpu, dev);
> +     ssize_t ret;
> +

Add:
        if (!capable(CAP_SYS_ADMIN))
                return -EPERM;


> +     switch (buf[0]) {

Use strict_strtoull pls.

> +     case '0':
> +             ret = xen_pcpu_down(cpu->xen_id);
> +             break;
> +     case '1':
> +             ret = xen_pcpu_up(cpu->xen_id);
> +             break;
> +     default:
> +             ret = -EINVAL;
> +     }
> +
> +     if (ret >= 0)
> +             ret = count;
> +     return ret;
> +}
> +static DEVICE_ATTR(online, S_IRUGO | S_IWUSR, show_online, store_online);

> +
> +static ssize_t show_apicid(struct device *dev,
> +                        struct device_attribute *attr,
> +                        char *buf)
> +{
> +     struct pcpu *cpu = container_of(dev, struct pcpu, dev);
> +
> +     return sprintf(buf, "%u\n", cpu->apic_id);
> +}
> +
> +static ssize_t show_acpiid(struct device *dev,
> +                        struct device_attribute *attr,
> +                        char *buf)
> +{
> +     struct pcpu *cpu = container_of(dev, struct pcpu, dev);
> +
> +     return sprintf(buf, "%u\n", cpu->acpi_id);
> +}
> +static DEVICE_ATTR(apic_id, S_IRUGO, show_apicid, NULL);
> +static DEVICE_ATTR(acpi_id, S_IRUGO, show_acpiid, NULL);

What benefit is there in exposing these values to the user?

> +
> +static bool xen_pcpu_online(uint32_t flags)
> +{
> +     return !!(flags & XEN_PCPU_FLAGS_ONLINE);
> +}
> +
> +static void pcpu_online_status(struct xenpf_pcpuinfo *info,
> +                            struct pcpu *pcpu)
> +{
> +     if (xen_pcpu_online(info->flags) &&
> +        !xen_pcpu_online(pcpu->flags)) {
> +             /* the pcpu is onlined */
> +             pcpu->flags |= XEN_PCPU_FLAGS_ONLINE;
> +             kobject_uevent(&pcpu->dev.kobj, KOBJ_ONLINE);
> +     } else if (!xen_pcpu_online(info->flags) &&
> +                 xen_pcpu_online(pcpu->flags)) {
> +             /* The pcpu is offlined */
> +             pcpu->flags &= ~XEN_PCPU_FLAGS_ONLINE;
> +             kobject_uevent(&pcpu->dev.kobj, KOBJ_OFFLINE);
> +     }
> +}
> +
> +static struct pcpu *get_pcpu(uint32_t xen_id)
> +{
> +     struct pcpu *pcpu = NULL;
> +
> +     list_for_each_entry(pcpu, &xen_pcpus.list, pcpu_list) {
> +             if (pcpu->xen_id == xen_id)
> +                     return pcpu;
> +     }
> +     return NULL;
> +}
> +
> +static void pcpu_sys_remove(struct pcpu *pcpu)
> +{
> +     struct device *dev;
> +
> +     if (!pcpu)
> +             return;
> +
> +     dev = &pcpu->dev;
> +     if (dev->id)
> +             device_remove_file(dev, &dev_attr_online);
> +     device_remove_file(dev, &dev_attr_acpi_id);
> +     device_remove_file(dev, &dev_attr_apic_id);
> +     device_unregister(dev);

So .. if you are using that, can't you use the .release
and define something like:

static void pcpu_release(struct device *dev)
{
        struct pcpu *pcpu = container_of(dev, struct pcpu, dev);

        list_del(&pcpu->pcpu_list);
        kfree(pcpu);
}
> +}
> +
> +static void free_pcpu(struct pcpu *pcpu)
> +{
> +     if (!pcpu)
> +             return;
> +
> +     pcpu_sys_remove(pcpu);

> +     list_del(&pcpu->pcpu_list);
> +     kfree(pcpu);

Those two shouldn't be there. They sould be part of the
release function.
> +}
> +
> +static int pcpu_sys_create(struct pcpu *pcpu)
> +{
> +     struct device *dev;
> +     int err = -EINVAL;
> +
> +     if (!pcpu)
> +             return err;
> +
> +     dev = &pcpu->dev;
> +     dev->bus = &xen_pcpu_subsys;
> +     dev->id = pcpu->xen_id;

And then here dev->release = &pcpu_release;

> +
> +     err = device_register(dev);
> +     if (err)
> +             goto err1;
> +
> +     err = device_create_file(dev, &dev_attr_apic_id);
> +     if (err)
> +             goto err2;
> +     err = device_create_file(dev, &dev_attr_acpi_id);
> +     if (err)
> +             goto err3;

Usually this is done with an array, like the drivers/xen/xen-balloon.c
..
But I am still not sure why these two parameters are needed?

> +     /* Not open pcpu0 online access to user */

Huh? You mean "Nobody can touch PCPU0" ?

Why? Why can they touch the other ones? And better yet,
what happens if one boots without "dom0_max_vcpus=X"
and powers of some of the CPUs?

> +     if (dev->id) {
> +             err = device_create_file(dev, &dev_attr_online);
> +             if (err)
> +                     goto err4;
> +     }
> +
> +     return 0;
> +
> +err4:
> +     device_remove_file(dev, &dev_attr_acpi_id);
> +err3:
> +     device_remove_file(dev, &dev_attr_apic_id);
> +err2:
> +     device_unregister(dev);
> +err1:
> +     return err;
> +}
> +
> +static struct pcpu *init_pcpu(struct xenpf_pcpuinfo *info)
> +{
> +     struct pcpu *pcpu;
> +     int err;
> +
> +     if (info->flags & XEN_PCPU_FLAGS_INVALID)
> +             return ERR_PTR(-ENODEV);
> +
> +     pcpu = kzalloc(sizeof(struct pcpu), GFP_KERNEL);
> +     if (!pcpu)
> +             return ERR_PTR(-ENOMEM);
> +
> +     INIT_LIST_HEAD(&pcpu->pcpu_list);
> +     pcpu->xen_id = info->xen_cpuid;
> +     pcpu->apic_id = info->apic_id;
> +     pcpu->acpi_id = info->acpi_id;
> +     pcpu->flags = info->flags;
> +
> +     err = pcpu_sys_create(pcpu);
> +     if (err) {
> +             pr_warning(XEN_PCPU "Failed to register pcpu%d\n",

Not %u?
> +                        info->xen_cpuid);
> +             kfree(pcpu);
> +             return ERR_PTR(-ENOENT);
> +     }
> +

Should this be protected by the mutex? [edit: I see now
that the calleer is suppose to hold on the lock.
Mention that please right before you do any list manipulations]


> +     list_add_tail(&pcpu->pcpu_list, &xen_pcpus.list);
> +     return pcpu;
> +}
> +
> +/*
> + * Caller should hold the pcpu lock

Provide full name of the mutex lock.
> + */
> +static int _sync_pcpu(uint32_t cpu_num, uint32_t *ptr_max_cpu_num)

Why the '_' in front of the name?

For parameters do:

uint32 cpu, uint32 *max_cpu

> +{
> +     int ret;
> +     struct pcpu *pcpu = NULL;
> +     struct xenpf_pcpuinfo *info;
> +     struct xen_platform_op op = {
> +             .cmd                   = XENPF_get_cpuinfo,
> +             .interface_version     = XENPF_INTERFACE_VERSION,
> +             .u.pcpu_info.xen_cpuid = cpu_num,
> +     };
> +
> +     ret = HYPERVISOR_dom0_op(&op);
> +     if (ret)
> +             return ret;
> +
> +     info = &op.u.pcpu_info;
> +     if (ptr_max_cpu_num)
> +             *ptr_max_cpu_num = info->max_present;
> +
> +     pcpu = get_pcpu(cpu_num);
> +
> +     if (info->flags & XEN_PCPU_FLAGS_INVALID) {
> +             /* The pcpu has been removed */
> +             if (pcpu)
> +                     free_pcpu(pcpu);
> +             return 0;
> +     }
> +
> +     if (!pcpu) {
> +             pcpu = init_pcpu(info);
> +             if (IS_ERR_OR_NULL(pcpu))
> +                     return -ENODEV;
> +     } else
> +             pcpu_online_status(info, pcpu);
> +
> +     return 0;
> +}
> +
> +/*
> + * Sync dom0's pcpu information with xen hypervisor's
> + */
> +static int xen_sync_pcpus(void)
> +{
> +     /*
> +      * Boot cpu always have cpu_id 0 in xen
> +      */
> +     uint32_t cpu_num = 0, max_cpu_num = 0;

Use 'cpu' and 'max_cpu'

> +     int err = 0;
> +     struct list_head *elem, *tmp;
> +     struct pcpu *pcpu;
> +
> +     get_pcpu_lock();
> +
> +     while (!err && (cpu_num <= max_cpu_num)) {
> +             err = _sync_pcpu(cpu_num, &max_cpu_num);
> +             cpu_num++;
> +     }
> +
> +     if (err) {
> +             list_for_each_safe(elem, tmp, &xen_pcpus.list) {

s/elem/pcpu/

> +                     pcpu = list_entry(elem, struct pcpu, pcpu_list);

That you can remove once you make the xen_pcpus structure as I mentioned
above.

> +                     free_pcpu(pcpu);
> +             }
> +     }
> +
> +     put_pcpu_lock();

Ah, so you are locking around here.
> +
> +     return err;
> +}
> +
> +static void xen_pcpu_dpc(struct work_struct *work)

s/dpc/workqueue/

> +{
> +     xen_sync_pcpus();
> +}
> +static DECLARE_WORK(xen_pcpu_work, xen_pcpu_dpc);
> +
> +static irqreturn_t xen_pcpu_interrupt(int irq, void *dev_id)
> +{
> +     schedule_work(&xen_pcpu_work);
> +     return IRQ_HANDLED;
> +}
> +
> +static int __init xen_pcpu_init(void)
> +{
> +     int ret;
> +
> +     if (!xen_initial_domain())
> +             return -ENODEV;
> +
> +     ret = subsys_system_register(&xen_pcpu_subsys, NULL);
> +     if (ret) {
> +             pr_warning(XEN_PCPU "Failed to register pcpu subsys\n");
> +             return ret;
> +     }
> +
> +     INIT_LIST_HEAD(&xen_pcpus.list);
> +
> +     ret = xen_sync_pcpus();
> +     if (ret) {
> +             pr_warning(XEN_PCPU "Failed to sync pcpu info\n");
> +             return ret;
> +     }
> +
> +     ret = bind_virq_to_irqhandler(VIRQ_PCPU_STATE, 0,
> +                                   xen_pcpu_interrupt, 0,
> +                                   "pcpu", NULL);

"xen-pcpu"

> +     if (ret < 0) {
> +             pr_warning(XEN_PCPU "Failed to bind pcpu virq\n");

Shouldn't you delete what 'xen_sync_pcpus' did?
Or is it OK to still work without the interrupts? What is the purpose
of that interrupt? How does it actually work - the hypervisor
decides when/where to turn off CPUs?

> +             return ret;
> +     }
> +
> +     return 0;
> +}
> +arch_initcall(xen_pcpu_init);
> diff --git a/include/xen/interface/platform.h 
> b/include/xen/interface/platform.h
> index 486653f..2c56d4f 100644
> --- a/include/xen/interface/platform.h
> +++ b/include/xen/interface/platform.h
> @@ -298,7 +298,7 @@ struct xenpf_set_processor_pminfo {
>  };
>  DEFINE_GUEST_HANDLE_STRUCT(xenpf_set_processor_pminfo);
>  
> -#define XENPF_get_cpuinfo 55
> +#define XENPF_get_cpuinfo    55

Why?

>  struct xenpf_pcpuinfo {
>       /* IN */
>       uint32_t xen_cpuid;
> @@ -314,6 +314,13 @@ struct xenpf_pcpuinfo {
>  };
>  DEFINE_GUEST_HANDLE_STRUCT(xenpf_pcpuinfo);
>  
> +#define XENPF_cpu_online     56
> +#define XENPF_cpu_offline    57
> +struct xenpf_cpu_ol {
> +     uint32_t cpuid;
> +};
> +DEFINE_GUEST_HANDLE_STRUCT(xenpf_cpu_ol);
> +
>  struct xen_platform_op {
>       uint32_t cmd;
>       uint32_t interface_version; /* XENPF_INTERFACE_VERSION */
> @@ -330,6 +337,7 @@ struct xen_platform_op {
>               struct xenpf_getidletime       getidletime;
>               struct xenpf_set_processor_pminfo set_pminfo;
>               struct xenpf_pcpuinfo          pcpu_info;
> +             struct xenpf_cpu_ol            cpu_ol;
>               uint8_t                        pad[128];
>       } u;
>  };
> diff --git a/include/xen/interface/xen.h b/include/xen/interface/xen.h
> index a890804..0801468 100644
> --- a/include/xen/interface/xen.h
> +++ b/include/xen/interface/xen.h
> @@ -80,6 +80,7 @@
>  #define VIRQ_CONSOLE    2  /* (DOM0) Bytes received on emergency console. */
>  #define VIRQ_DOM_EXC    3  /* (DOM0) Exceptional event for some domain.   */
>  #define VIRQ_DEBUGGER   6  /* (DOM0) A domain has paused for debugging.   */
> +#define VIRQ_PCPU_STATE 9  /* (DOM0) PCPU state changed                   */
>  
>  /* Architecture-specific VIRQ definitions. */
>  #define VIRQ_ARCH_0    16
> -- 
> 1.7.1



_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.