[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

RE: [Xen-devel] Re: [PATCH] Add hypercall to expose physical CPU in xen hypervisor



Jeremy, here is the update patch according to the new interface, please have a 
look.
One changes in this patch is, the apic_id/acpi_id is exposed in sysfs also.

Thanks
yunhong Jiang

Export host physical CPU information to dom0

From: Jiang, Yunhong <yunhong.jiang@xxxxxxxxx>

This patch expose host's physical CPU information to dom0 in sysfs, so that 
dom0's management tools can control the physical CPU if needed.

It also provides interface in sysfs to logical online/offline a physical CPU.

Notice: The information in dom0 is synced with xen hypervisor asynchronously.

Signed-off-by: Jiang, Yunhong <yunhong.jiang@xxxxxxxxx>
---

 drivers/xen/Makefile             |    2
 drivers/xen/pcpu.c               |  462 ++++++++++++++++++++++++++++++++++++++
 include/xen/interface/platform.h |   48 ++++
 include/xen/interface/xen.h      |    1
 include/xen/pcpu.h               |   30 ++
 5 files changed, 542 insertions(+), 1 deletions(-)
 create mode 100644 drivers/xen/pcpu.c
 create mode 100644 include/xen/pcpu.h


diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile
index cddfffb..5d01856 100644
--- a/drivers/xen/Makefile
+++ b/drivers/xen/Makefile
@@ -1,4 +1,4 @@
-obj-y  += grant-table.o features.o events.o manage.o biomerge.o
+obj-y  += grant-table.o features.o events.o manage.o biomerge.o pcpu.o
 obj-y  += xenbus/ live_maps.o

 nostackp := $(call cc-option, -fno-stack-protector)
diff --git a/drivers/xen/pcpu.c b/drivers/xen/pcpu.c
new file mode 100644
index 0000000..fc2f70f
--- /dev/null
+++ b/drivers/xen/pcpu.c
@@ -0,0 +1,462 @@
+/*
+ * pcpu.c - management physical cpu in dom0 environment
+ */
+#include <linux/interrupt.h>
+#include <linux/spinlock.h>
+#include <asm/xen/hypervisor.h>
+#include <asm/xen/hypercall.h>
+#include <linux/cpu.h>
+#include <xen/xenbus.h>
+#include <xen/pcpu.h>
+#include <xen/events.h>
+#include <xen/acpi.h>
+
+static struct sysdev_class xen_pcpu_sysdev_class = {
+       .name = "xen_pcpu",
+};
+
+static DEFINE_SPINLOCK(xen_pcpu_spinlock);
+static RAW_NOTIFIER_HEAD(xen_pcpu_chain);
+
+/* No need for irq disable since hotplug notify is in workqueue context */
+#define get_pcpu_lock()  spin_lock(&xen_pcpu_spinlock);
+#define put_pcpu_lock()  spin_unlock(&xen_pcpu_spinlock);
+
+struct xen_pcpus {
+       struct list_head list;
+       int possible;
+       int present;
+};
+static struct xen_pcpus xen_pcpus;
+
+int register_xen_pcpu_notifier(struct notifier_block *nb)
+{
+       int ret;
+
+       /* All refer to the chain notifier is protected by the pcpu_lock */
+       get_pcpu_lock();
+       ret = raw_notifier_chain_register(&xen_pcpu_chain, nb);
+       put_pcpu_lock();
+       return ret;
+}
+EXPORT_SYMBOL_GPL(register_xen_pcpu_notifier);
+
+void unregister_xen_pcpu_notifier(struct notifier_block *nb)
+{
+       get_pcpu_lock();
+       raw_notifier_chain_unregister(&xen_pcpu_chain, nb);
+       put_pcpu_lock();
+}
+EXPORT_SYMBOL_GPL(unregister_xen_pcpu_notifier);
+
+static int xen_pcpu_down(uint32_t xen_id)
+{
+       int ret;
+       xen_platform_op_t op = {
+               .cmd                    = XENPF_resource_hotplug,
+               .interface_version      = XENPF_INTERFACE_VERSION,
+               .u.resource.u.cpu_ol.cpuid = xen_id,
+       };
+
+       op.u.resource.sub_cmd   = XEN_CPU_offline;
+       ret = HYPERVISOR_dom0_op(&op);
+       return ret;
+}
+
+static int xen_pcpu_up(uint32_t xen_id)
+{
+       int ret;
+       xen_platform_op_t op = {
+               .cmd                    = XENPF_resource_hotplug,
+               .interface_version      = XENPF_INTERFACE_VERSION,
+               .u.resource.u.cpu_ol.cpuid = xen_id,
+       };
+
+       op.u.resource.sub_cmd   = XEN_CPU_online;
+       ret = HYPERVISOR_dom0_op(&op);
+       return ret;
+}
+
+static ssize_t show_online(struct sys_device *dev,
+                       struct sysdev_attribute *attr,
+                       char *buf)
+{
+       struct pcpu *cpu = container_of(dev, struct pcpu, sysdev);
+
+       return sprintf(buf, "%u\n", !!(cpu->flags & XEN_PCPU_FLAGS_ONLINE));
+}
+
+static ssize_t __ref store_online(struct sys_device *dev,
+                                 struct sysdev_attribute *attr,
+                                 const char *buf, size_t count)
+{
+       struct pcpu *cpu = container_of(dev, struct pcpu, sysdev);
+       ssize_t ret;
+
+       switch (buf[0]) {
+       case '0':
+               ret = xen_pcpu_down(cpu->xen_id);
+               break;
+       case '1':
+               ret = xen_pcpu_up(cpu->xen_id);
+               break;
+       default:
+               ret = -EINVAL;
+       }
+
+       if (ret >= 0)
+               ret = count;
+       return ret;
+}
+
+static SYSDEV_ATTR(online, 0644, show_online, store_online);
+
+static ssize_t show_apicid(struct sys_device *dev,
+                       struct sysdev_attribute *attr,
+                       char *buf)
+{
+       struct pcpu *cpu = container_of(dev, struct pcpu, sysdev);
+
+       return sprintf(buf, "%u\n", cpu->apic_id);
+}
+
+static ssize_t show_acpiid(struct sys_device *dev,
+                       struct sysdev_attribute *attr,
+                       char *buf)
+{
+       struct pcpu *cpu = container_of(dev, struct pcpu, sysdev);
+
+       return sprintf(buf, "%u\n", cpu->acpi_id);
+}
+static SYSDEV_ATTR(apic_id, 0444, show_apicid, NULL);
+static SYSDEV_ATTR(acpi_id, 0444, show_acpiid, NULL);
+
+static int xen_pcpu_free(struct pcpu *pcpu)
+{
+       if (!pcpu)
+               return 0;
+
+       sysdev_remove_file(&pcpu->sysdev, &attr_online);
+       sysdev_unregister(&pcpu->sysdev);
+       list_del(&pcpu->pcpu_list);
+       kfree(pcpu);
+
+       return 0;
+}
+
+static struct pcpu *xen_pcpu_add(struct xen_physical_cpuinfo *info)
+{
+       struct pcpu *cpu;
+       int error;
+
+       printk(KERN_DEBUG "xen_pcpu_add:  xen_id %x apic_id %x acpi_id %x\n",
+                          info->xen_cpuid, info->apic_id, info->acpi_id);
+
+       if (info->flags & XEN_PCPU_FLAGS_INVALID)
+               return NULL;
+
+       cpu = kzalloc(sizeof(struct pcpu), GFP_KERNEL);
+       if (!cpu)
+               return NULL;
+
+       INIT_LIST_HEAD(&cpu->pcpu_list);
+       cpu->xen_id = info->xen_cpuid;
+       cpu->apic_id = info->apic_id;
+       cpu->acpi_id = info->acpi_id;
+       cpu->flags = info->flags;
+
+       cpu->sysdev.cls = &xen_pcpu_sysdev_class;
+       cpu->sysdev.id = info->xen_cpuid;
+
+       error = sysdev_register(&cpu->sysdev);
+       if (error) {
+               printk(KERN_WARNING "xen_pcpu_add: Failed to register pcpu\n");
+               kfree(cpu);
+               return NULL;
+       }
+       sysdev_create_file(&cpu->sysdev, &attr_online);
+       sysdev_create_file(&cpu->sysdev, &attr_apic_id);
+       sysdev_create_file(&cpu->sysdev, &attr_acpi_id);
+       list_add_tail(&cpu->pcpu_list, &xen_pcpus.list);
+
+       return cpu;
+}
+
+static struct xen_physical_cpuinfo *xen_pcpu_info_fetch(int *num,
+               int *possible)
+{
+       int cpu_num, ret = 0, i;
+       struct xen_physical_cpuinfo *info;
+       xen_platform_op_t op = {
+               .cmd            = XENPF_get_cpuinfo,
+               .interface_version  = XENPF_INTERFACE_VERSION,
+               .u.pcpu_info.info_num   = 0,
+       };
+
+       set_xen_guest_handle(op.u.pcpu_info.info, NULL);
+
+       ret = HYPERVISOR_dom0_op(&op);
+       if (ret)
+               return NULL;
+
+       cpu_num = op.u.pcpu_info.max_present;
+
+       info = kzalloc(cpu_num * sizeof(struct xen_physical_cpuinfo),
+               GFP_KERNEL);
+       if (!info)
+               return NULL;
+
+       op.u.pcpu_info.info_num = cpu_num;
+       for (i = 0; i < cpu_num; i++)
+               info[i].xen_cpuid = i;
+
+       set_xen_guest_handle(op.u.pcpu_info.info, info);
+
+       ret = HYPERVISOR_dom0_op(&op);
+       if (ret) {
+               kfree(info);
+               printk(KERN_WARNING "xen_pcpu_info_fetch: Error fetch pcpu 
info\n");
+               return NULL;
+       }
+
+       /* present cpus is changed, that mean hotplug may happen */
+       if (cpu_num != op.u.pcpu_info.max_present)
+               printk(KERN_WARNING "Max present cpus changed when fetch cpu 
info\n");
+
+       if (possible)
+               *possible = op.u.pcpu_info.max_possible;
+       if (num)
+               *num = cpu_num;
+
+       return info;
+}
+
+static inline int same_pcpu(struct xen_physical_cpuinfo *info,
+                           struct pcpu *pcpu)
+{
+       return (pcpu->apic_id == info->apic_id) &&
+               (pcpu->xen_id == info->xen_cpuid);
+}
+
+/*
+ * Return 1 if online status changed
+ */
+static int xen_pcpu_online_check(struct xen_physical_cpuinfo *info,
+                                struct pcpu *pcpu)
+{
+       int result = 0;
+
+       if (!same_pcpu(info, pcpu))
+               return 0;
+
+       if (xen_pcpu_online(info->flags) && !xen_pcpu_online(pcpu->flags)) {
+               /* the pcpu is onlined */
+               pcpu->flags |= XEN_PCPU_FLAGS_ONLINE;
+               kobject_uevent(&pcpu->sysdev.kobj, KOBJ_ONLINE);
+               raw_notifier_call_chain(&xen_pcpu_chain,
+                       XEN_PCPU_ONLINE, (void *)(long)pcpu->xen_id);
+               result = 1;
+       } else if (!xen_pcpu_online(info->flags) &&
+                xen_pcpu_online(pcpu->flags))  {
+               /* The pcpu is offlined now */
+               pcpu->flags &= ~XEN_PCPU_FLAGS_ONLINE;
+               kobject_uevent(&pcpu->sysdev.kobj, KOBJ_OFFLINE);
+               raw_notifier_call_chain(&xen_pcpu_chain,
+                       XEN_PCPU_OFFLINE, (void *)(long)pcpu->xen_id);
+               result = 1;
+       }
+
+       return result;
+}
+
+/*
+ * Sync dom0's pcpu information with xen hypervisor's
+ */
+static int xen_pcpu_sync(void)
+{
+       struct xen_physical_cpuinfo *info;
+       int cpu_num, i, ret = -1, possible, present = 0;
+       struct list_head *elem, *tmp;
+       struct pcpu *pcpu;
+
+       info = xen_pcpu_info_fetch(&cpu_num, &possible);
+       if (!info)
+               return -1;
+
+       get_pcpu_lock();
+
+       xen_pcpus.possible = possible;
+
+       /* Check for current cpu list */
+       for (i = 0; i < cpu_num; i++) {
+               int found = 0;
+
+               if (info[i].flags & XEN_PCPU_FLAGS_INVALID)
+                       continue;
+
+               present++;
+               list_for_each_entry(pcpu, &xen_pcpus.list, pcpu_list) {
+                       if (same_pcpu(&info[i], pcpu)) {
+                               xen_pcpu_online_check(&info[i], pcpu);
+                               pcpu->status |= PCPU_LOOPED;
+                               found = 1;
+                       }
+               }
+               if (!found) {
+                       struct pcpu *cpu;
+
+                       /*
+                        * Normally a physical cpu is onlined in two step
+                        * Firstly, the physical CPU is hot-added, secondly
+                        * it will be take online through sysfs writing
+                        * So a hot-added CPU should be offlined initially
+                        */
+                       if (xen_pcpu_online(info[i].flags))
+                               printk(KERN_WARNING "xen_pcpu_sync:"
+                                       "A hotadd cpu is onlined also\n");
+
+                       cpu = xen_pcpu_add(&info[i]);
+                       if (cpu == NULL)
+                               goto failed;
+                       raw_notifier_call_chain(&xen_pcpu_chain,
+                                       XEN_PCPU_ADD,
+                                       (void *)(long)cpu->xen_id);
+                       cpu->status |= PCPU_LOOPED;
+               }
+       }
+
+       list_for_each_safe(elem, tmp, &xen_pcpus.list) {
+               pcpu = list_entry(elem, struct pcpu, pcpu_list);
+               if (pcpu->status & PCPU_LOOPED)
+                       pcpu->status &= ~PCPU_LOOPED;
+               else {
+                       /* The pcpu does not exist any more, remove it */
+                       raw_notifier_call_chain(&xen_pcpu_chain,
+                                       XEN_PCPU_REMOVE,
+                                       (void *)(long)pcpu->xen_id);
+                       xen_pcpu_free(pcpu);
+               }
+       }
+
+       xen_pcpus.present = present;
+       ret = 0;
+failed:
+       put_pcpu_lock();
+       kfree(info);
+       return ret;
+}
+
+static int __init xen_pcpu_info_init(void)
+{
+       int possible, cpu_num, i;
+       struct xen_physical_cpuinfo *info = NULL;
+       struct list_head *elem, *tmp;
+       struct pcpu *pcpu;
+
+       info = xen_pcpu_info_fetch(&cpu_num, &possible);
+       if (!info) {
+               printk(KERN_WARNING
+                       "xen_pcpu_info_init: Failed to fetch pcpu_info\n");
+               return -1;
+       }
+
+       get_pcpu_lock();
+
+       INIT_LIST_HEAD(&xen_pcpus.list);
+       xen_pcpus.possible = possible;
+       xen_pcpus.present = 0;
+
+       for (i = 0; i < cpu_num; i++) {
+               if (info[i].flags & XEN_PCPU_FLAGS_INVALID)
+                       continue;
+               pcpu = xen_pcpu_add(&info[i]);
+               if (!pcpu)
+                       goto failed;
+               xen_pcpus.present++;
+       }
+
+       put_pcpu_lock();
+       kfree(info);
+
+       return 0;
+failed:
+       list_for_each_safe(elem, tmp, &xen_pcpus.list) {
+               pcpu = list_entry(elem, struct pcpu, pcpu_list);
+               xen_pcpu_free(pcpu);
+       }
+       xen_pcpus.possible = xen_pcpus.present = 0;
+       INIT_LIST_HEAD(&xen_pcpus.list);
+       put_pcpu_lock();
+
+       kfree(info);
+       return -1;
+}
+
+static void xen_pcpu_dpc(struct work_struct *work)
+{
+       if (xen_pcpu_sync() < 0)
+               printk(KERN_WARNING
+                       "xen_pcpu_dpc: Failed to sync pcpu information\n");
+}
+static DECLARE_WORK(xen_pcpu_work, xen_pcpu_dpc);
+
+/*
+ * type: 0 add, 1 remove
+ */
+int xen_pcpu_hotplug(int type, uint32_t apic_id)
+{
+       struct pcpu *pcpu;
+       int found = 0;
+
+       xen_pcpu_sync();
+       get_pcpu_lock();
+       list_for_each_entry(pcpu, &xen_pcpus.list, pcpu_list)
+       {
+               if (pcpu->apic_id == apic_id) {
+                       found = 1;
+                       break;
+               }
+       }
+       put_pcpu_lock();
+
+       if (!found && (type == HOTPLUG_TYPE_ADD))
+               printk(KERN_WARNING "The cpu is not added into Xen HV?\n");
+
+       if (found && (type == HOTPLUG_TYPE_REMOVE))
+               printk(KERN_WARNING "The cpu still exits in Xen HV?\n");
+       return 0;
+}
+EXPORT_SYMBOL(xen_pcpu_hotplug);
+
+static irqreturn_t xen_pcpu_interrupt(int irq, void *dev_id)
+{
+       schedule_work(&xen_pcpu_work);
+       return IRQ_HANDLED;
+}
+
+static int __init xen_pcpu_init(void)
+{
+       int err;
+
+       if (!xen_initial_domain())
+               return 0;
+
+       err = sysdev_class_register(&xen_pcpu_sysdev_class);
+       if (err) {
+               printk(KERN_WARNING
+                       "xen_pcpu_init: register xen_pcpu sysdev Failed!\n");
+               return err;
+       }
+
+       err = xen_pcpu_info_init();
+       if (!err)
+               err = bind_virq_to_irqhandler(VIRQ_PCPU_STATE,
+                       0, xen_pcpu_interrupt, 0, "pcpu", NULL);
+       if (err < 0)
+               printk(KERN_WARNING "xen_pcpu_init: "
+                       "Failed to bind pcpu_state virq\n"
+                       "You will lost latest information! \n");
+       return err;
+}
+
+subsys_initcall(xen_pcpu_init);
diff --git a/include/xen/interface/platform.h b/include/xen/interface/platform.h
index 6783fce..a9cf4e2 100644
--- a/include/xen/interface/platform.h
+++ b/include/xen/interface/platform.h
@@ -312,6 +312,52 @@ struct xenpf_set_processor_pminfo {
 typedef struct xenpf_set_processor_pminfo xenpf_set_processor_pminfo_t;
 DEFINE_GUEST_HANDLE_STRUCT(xenpf_set_processor_pminfo);

+#define XENPF_get_cpuinfo 55
+struct xen_physical_cpuinfo {
+       /* IN */
+       uint32_t xen_cpuid;
+       /* OUT */
+       uint32_t apic_id;
+       uint32_t acpi_id;
+#define XEN_PCPU_FLAGS_ONLINE  1
+       /* Correponding xen_cpuid is not present*/
+#define XEN_PCPU_FLAGS_INVALID 2
+       uint32_t flags;
+       uint8_t pad[128];
+};
+typedef struct xen_physical_cpuinfo xen_physical_cpuinfo_t;
+DEFINE_GUEST_HANDLE_STRUCT(xen_physical_cpuinfo);
+
+struct xenpf_pcpu_info {
+       /* OUT */
+       /* The maxium cpu_id that is present */
+       uint32_t max_present;
+       /* The maxium possible cpus */
+       uint32_t max_possible;
+
+       /* IN */
+       uint32_t info_num;
+       GUEST_HANDLE(xen_physical_cpuinfo) info;
+};
+typedef struct xenpf_pcpu_info xenpf_pcpu_info_t;
+DEFINE_GUEST_HANDLE_STRUCT(xenpf_pcpu_info);
+
+struct xenpf_cpu_ol {
+       uint32_t cpuid;
+};
+
+#define XENPF_resource_hotplug 56
+struct xenpf_resource_hotplug {
+       uint32_t sub_cmd;
+#define XEN_CPU_online  1
+#define XEN_CPU_offline 2
+       union {
+               struct xenpf_cpu_ol cpu_ol;
+               uint8_t            pad[64];
+       } u;
+};
+
+
 struct xen_platform_op {
        uint32_t cmd;
        uint32_t interface_version; /* XENPF_INTERFACE_VERSION */
@@ -327,6 +373,8 @@ struct xen_platform_op {
                struct xenpf_change_freq       change_freq;
                struct xenpf_getidletime       getidletime;
                struct xenpf_set_processor_pminfo set_pminfo;
+               struct xenpf_pcpu_info          pcpu_info;
+               struct xenpf_resource_hotplug   resource;
                uint8_t                        pad[128];
        } u;
 };
diff --git a/include/xen/interface/xen.h b/include/xen/interface/xen.h
index 812ffd5..9ffaee0 100644
--- a/include/xen/interface/xen.h
+++ b/include/xen/interface/xen.h
@@ -79,6 +79,7 @@
 #define VIRQ_CONSOLE    2  /* (DOM0) Bytes received on emergency console. */
 #define VIRQ_DOM_EXC    3  /* (DOM0) Exceptional event for some domain.   */
 #define VIRQ_DEBUGGER   6  /* (DOM0) A domain has paused for debugging.   */
+#define VIRQ_PCPU_STATE 9  /* (DOM0) PCPU state changed                   */

 /* Architecture-specific VIRQ definitions. */
 #define VIRQ_ARCH_0    16
diff --git a/include/xen/pcpu.h b/include/xen/pcpu.h
new file mode 100644
index 0000000..32fcb0d
--- /dev/null
+++ b/include/xen/pcpu.h
@@ -0,0 +1,30 @@
+#ifndef _XEN_PCPU_H
+#define _XEN_PCPU_H
+
+#include <xen/interface/platform.h>
+extern int xen_pcpu_hotplug(int type, uint32_t apic_id);
+#define XEN_PCPU_ONLINE     0x01
+#define XEN_PCPU_OFFLINE    0x02
+#define XEN_PCPU_ADD        0x04
+#define XEN_PCPU_REMOVE     0x08
+
+struct pcpu {
+       struct list_head pcpu_list;
+       struct sys_device sysdev;
+       uint32_t xen_id;
+       uint32_t apic_id;
+       uint32_t acpi_id;
+       uint32_t flags;
+#define PCPU_LOOPED 0x10000000
+       uint32_t status;
+};
+
+static inline int xen_pcpu_online(uint32_t flags)
+{
+       return !!(flags & XEN_PCPU_FLAGS_ONLINE);
+}
+
+extern int register_xen_pcpu_notifier(struct notifier_block *nb);
+
+extern void unregister_xen_pcpu_notifier(struct notifier_block *nb);
+#endif


xen-devel-bounces@xxxxxxxxxxxxxxxxxxx wrote:
> On 11/07/09 23:46, Keir Fraser wrote:
>>> The Linux patch is initially part of patch for CPU hotplug.
>>>
>> Ah, I see it's part of the phys hotplug stuff. It's okay with me if
>> Jeremy will ack it. Also XENPF_get_cpuinfo should be made
>> extensible, and should get a comment in public/platform.h, e.g.,
>> about what ncpus and max_cpus mean.
>>
>
> The kernel patch looks nice and self-contained, so I'd have no
> problems applying it based on its form.  I'll hold off applying it
> until you've got a final extensible form for the ABI.
>
>    J
>
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@xxxxxxxxxxxxxxxxxxx
> http://lists.xensource.com/xen-devel

Attachment: 01-dom0_pcpu.patch
Description: 01-dom0_pcpu.patch

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.