[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-changelog] Merge.



# HG changeset patch
# User adsharma@xxxxxxxxxxxxxxxxxxxx
# Node ID 23979fb12c4908a5743b833da8d87e73677c5461
# Parent  6a6c4a422780f0aeb357f2fd8286a36afd3876b8
# Parent  fbdbe4fc218de40d5176e0104908e05fb6e2c6ce
Merge.

diff -r 6a6c4a422780 -r 23979fb12c49 
linux-2.6-xen-sparse/arch/xen/i386/kernel/Makefile
--- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/Makefile        Tue Aug 16 
22:27:16 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/Makefile        Wed Aug 17 
20:33:56 2005
@@ -44,7 +44,7 @@
 c-obj-$(CONFIG_EFI)            += efi.o efi_stub.o
 c-obj-$(CONFIG_EARLY_PRINTK)   += early_printk.o
 c-obj-$(CONFIG_SMP_ALTERNATIVES)+= smpalts.o
-c-obj-$(CONFIG_SWIOTLB)                += swiotlb.o
+obj-$(CONFIG_SWIOTLB)          += swiotlb.o
 
 EXTRA_AFLAGS   := -traditional
 
diff -r 6a6c4a422780 -r 23979fb12c49 
linux-2.6-xen-sparse/arch/xen/i386/kernel/i386_ksyms.c
--- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/i386_ksyms.c    Tue Aug 16 
22:27:16 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/i386_ksyms.c    Wed Aug 17 
20:33:56 2005
@@ -115,9 +115,6 @@
 EXPORT_SYMBOL(__copy_to_user_ll);
 EXPORT_SYMBOL(strnlen_user);
 
-EXPORT_SYMBOL(dma_alloc_coherent);
-EXPORT_SYMBOL(dma_free_coherent);
-
 #ifdef CONFIG_PCI
 EXPORT_SYMBOL(pci_mem_start);
 #endif
diff -r 6a6c4a422780 -r 23979fb12c49 
linux-2.6-xen-sparse/arch/xen/i386/kernel/pci-dma.c
--- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/pci-dma.c       Tue Aug 16 
22:27:16 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/pci-dma.c       Wed Aug 17 
20:33:56 2005
@@ -24,13 +24,14 @@
        unsigned long   *bitmap;
 };
 
-static void iommu_bug(void)
-{
-       printk(KERN_ALERT "Fatal DMA error! Please use 'swiotlb=force'\n");
-       BUG();
-}
-
-#define IOMMU_BUG_ON(test) do { if (unlikely(test)) iommu_bug(); } while(0)
+#define IOMMU_BUG_ON(test)                             \
+do {                                                   \
+       if (unlikely(test)) {                           \
+               printk(KERN_ALERT "Fatal DMA error! "   \
+                      "Please use 'swiotlb=force'\n"); \
+               BUG();                                  \
+       }                                               \
+} while (0)
 
 int
 dma_map_sg(struct device *hwdev, struct scatterlist *sg, int nents,
diff -r 6a6c4a422780 -r 23979fb12c49 
linux-2.6-xen-sparse/arch/xen/i386/mm/hypervisor.c
--- a/linux-2.6-xen-sparse/arch/xen/i386/mm/hypervisor.c        Tue Aug 16 
22:27:16 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/mm/hypervisor.c        Wed Aug 17 
20:33:56 2005
@@ -35,6 +35,7 @@
 #include <asm/pgtable.h>
 #include <asm-xen/hypervisor.h>
 #include <asm-xen/balloon.h>
+#include <linux/module.h>
 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
 #include <linux/percpu.h>
 #include <asm/tlbflush.h>
@@ -352,7 +353,6 @@
     balloon_unlock(flags);
 }
 
-#ifdef CONFIG_XEN_PHYSDEV_ACCESS
 
 unsigned long allocate_empty_lowmem_region(unsigned long pages)
 {
@@ -401,4 +401,4 @@
     return vstart;
 }
 
-#endif /* CONFIG_XEN_PHYSDEV_ACCESS */
+EXPORT_SYMBOL(allocate_empty_lowmem_region);
diff -r 6a6c4a422780 -r 23979fb12c49 
linux-2.6-xen-sparse/arch/xen/kernel/reboot.c
--- a/linux-2.6-xen-sparse/arch/xen/kernel/reboot.c     Tue Aug 16 22:27:16 2005
+++ b/linux-2.6-xen-sparse/arch/xen/kernel/reboot.c     Wed Aug 17 20:33:56 2005
@@ -256,19 +256,23 @@
     char *str;
 
     str = (char *)xenbus_read("control", "shutdown", NULL);
-    /* Ignore read errors and recursive shutdown events. */
-    if (IS_ERR(str) || !strcmp(str, __stringify(SHUTDOWN_INVALID)))
+    /* Ignore read errors. */
+    if (IS_ERR(str))
         return;
-
-    xenbus_printf("control", "shutdown", "%i", SHUTDOWN_INVALID);
-
-    if (strcmp(str, "poweroff") == 0) {
+    if (strlen(str) == 0) {
+        kfree(str);
+        return;
+    }
+
+    xenbus_write("control", "shutdown", "", O_CREAT);
+
+    if (strcmp(str, "poweroff") == 0)
         shutting_down = SHUTDOWN_POWEROFF;
-    } else if (strcmp(str, "reboot") == 0) {
+    else if (strcmp(str, "reboot") == 0)
         shutting_down = SHUTDOWN_REBOOT;
-    } else if (strcmp(str, "suspend") == 0) {
+    else if (strcmp(str, "suspend") == 0)
         shutting_down = SHUTDOWN_SUSPEND;
-    } else {
+    else {
         printk("Ignoring shutdown request: %s\n", str);
         shutting_down = SHUTDOWN_INVALID;
     }
diff -r 6a6c4a422780 -r 23979fb12c49 
linux-2.6-xen-sparse/arch/xen/kernel/skbuff.c
--- a/linux-2.6-xen-sparse/arch/xen/kernel/skbuff.c     Tue Aug 16 22:27:16 2005
+++ b/linux-2.6-xen-sparse/arch/xen/kernel/skbuff.c     Wed Aug 17 20:33:56 2005
@@ -5,8 +5,6 @@
 #include <linux/kernel.h>
 #include <linux/sched.h>
 #include <linux/slab.h>
-#include <linux/string.h>
-#include <linux/errno.h>
 #include <linux/netdevice.h>
 #include <linux/inetdevice.h>
 #include <linux/etherdevice.h>
@@ -14,34 +12,86 @@
 #include <linux/init.h>
 #include <asm/io.h>
 #include <asm/page.h>
-
-EXPORT_SYMBOL(__dev_alloc_skb);
+#include <asm-xen/hypervisor.h>
 
 /* Referenced in netback.c. */
 /*static*/ kmem_cache_t *skbuff_cachep;
 
-/* Size must be cacheline-aligned (alloc_skb uses SKB_DATA_ALIGN). */
-#define XEN_SKB_SIZE \
-    ((PAGE_SIZE - sizeof(struct skb_shared_info)) & ~(SMP_CACHE_BYTES - 1))
+#define MAX_SKBUFF_ORDER 2
+static kmem_cache_t *skbuff_order_cachep[MAX_SKBUFF_ORDER + 1];
 
 struct sk_buff *__dev_alloc_skb(unsigned int length, int gfp_mask)
 {
-    struct sk_buff *skb;
-    skb = alloc_skb_from_cache(skbuff_cachep, length + 16, gfp_mask);
-    if ( likely(skb != NULL) )
-        skb_reserve(skb, 16);
-    return skb;
+       struct sk_buff *skb;
+       int order;
+
+       length = SKB_DATA_ALIGN(length + 16);
+       order = get_order(length + sizeof(struct skb_shared_info));
+       if (order > MAX_SKBUFF_ORDER) {
+               printk(KERN_ALERT "Attempt to allocate order %d skbuff. "
+                      "Increase MAX_SKBUFF_ORDER.\n", order);
+               return NULL;
+       }
+
+       skb = alloc_skb_from_cache(
+               skbuff_order_cachep[order], length, gfp_mask);
+       if (skb != NULL)
+               skb_reserve(skb, 16);
+
+       return skb;
 }
 
 static void skbuff_ctor(void *buf, kmem_cache_t *cachep, unsigned long unused)
 {
-    scrub_pages(buf, 1);
+       int order = 0;
+
+       while (skbuff_order_cachep[order] != cachep)
+               order++;
+
+       if (order != 0)
+               xen_create_contiguous_region((unsigned long)buf, order);
+
+       scrub_pages(buf, 1 << order);
+}
+
+static void skbuff_dtor(void *buf, kmem_cache_t *cachep, unsigned long unused)
+{
+       int order = 0;
+
+       while (skbuff_order_cachep[order] != cachep)
+               order++;
+
+       if (order != 0)
+               xen_destroy_contiguous_region((unsigned long)buf, order);
 }
 
 static int __init skbuff_init(void)
 {
-    skbuff_cachep = kmem_cache_create(
-        "xen-skb", PAGE_SIZE, PAGE_SIZE, 0, skbuff_ctor, NULL);
-    return 0;
+       static char name[MAX_SKBUFF_ORDER + 1][20];
+       unsigned long size;
+       int order;
+
+       for (order = 0; order <= MAX_SKBUFF_ORDER; order++) {
+               size = PAGE_SIZE << order;
+               sprintf(name[order], "xen-skb-%lu", size);
+               skbuff_order_cachep[order] = kmem_cache_create(
+                       name[order], size, size, 0, skbuff_ctor, skbuff_dtor);
+       }
+
+       skbuff_cachep = skbuff_order_cachep[0];
+
+       return 0;
 }
 __initcall(skbuff_init);
+
+EXPORT_SYMBOL(__dev_alloc_skb);
+
+/*
+ * Local variables:
+ *  c-file-style: "linux"
+ *  indent-tabs-mode: t
+ *  c-indent-level: 8
+ *  c-basic-offset: 8
+ *  tab-width: 8
+ * End:
+ */
diff -r 6a6c4a422780 -r 23979fb12c49 
linux-2.6-xen-sparse/arch/xen/x86_64/mm/fault.c
--- a/linux-2.6-xen-sparse/arch/xen/x86_64/mm/fault.c   Tue Aug 16 22:27:16 2005
+++ b/linux-2.6-xen-sparse/arch/xen/x86_64/mm/fault.c   Wed Aug 17 20:33:56 2005
@@ -250,7 +250,11 @@
           happen within a race in page table update. In the later
           case just flush. */
 
-       pgd = pgd_offset(current->mm ?: &init_mm, address);
+       /* On Xen the line below does not always work. Needs investigating! */
+       /*pgd = pgd_offset(current->mm ?: &init_mm, address);*/
+       pgd = (pgd_t *)per_cpu(cur_pgd, smp_processor_id());
+       pgd += pgd_index(address);
+
        pgd_ref = pgd_offset_k(address);
        if (pgd_none(*pgd_ref))
                return -1;
diff -r 6a6c4a422780 -r 23979fb12c49 
linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_comms.h
--- a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_comms.h    Tue Aug 16 
22:27:16 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_comms.h    Wed Aug 17 
20:33:56 2005
@@ -1,6 +1,33 @@
-/* Private include for xenbus communications. */
+/*
+ * Private include for xenbus communications.
+ * 
+ * Copyright (C) 2005 Rusty Russell, IBM Corporation
+ *
+ * This file may be distributed separately from the Linux kernel, or
+ * incorporated into other software packages, subject to the following license:
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
 #ifndef _XENBUS_COMMS_H
 #define _XENBUS_COMMS_H
+
 int xs_init(void);
 int xb_init_comms(void);
 void xb_suspend_comms(void);
diff -r 6a6c4a422780 -r 23979fb12c49 
linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c
--- a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c    Tue Aug 16 
22:27:16 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c    Wed Aug 17 
20:33:56 2005
@@ -309,6 +309,7 @@
 void xenbus_resume(void)
 {
        xb_init_comms();
+       reregister_xenbus_watches();
        up(&xenbus_lock);
 }
 
diff -r 6a6c4a422780 -r 23979fb12c49 
linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_xs.c
--- a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_xs.c       Tue Aug 16 
22:27:16 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_xs.c       Wed Aug 17 
20:33:56 2005
@@ -496,6 +496,18 @@
                       watch->node, err);
 }
 
+/* Re-register callbacks to all watches. */
+void reregister_xenbus_watches(void)
+{
+       struct xenbus_watch *watch;
+       char token[sizeof(watch) * 2 + 1];
+
+       list_for_each_entry(watch, &watches, list) {
+               sprintf(token, "%lX", (long)watch);
+               xs_watch(watch->node, token);
+       }
+}
+
 static int watch_thread(void *unused)
 {
        for (;;) {
diff -r 6a6c4a422780 -r 23979fb12c49 
linux-2.6-xen-sparse/include/asm-xen/hypervisor.h
--- a/linux-2.6-xen-sparse/include/asm-xen/hypervisor.h Tue Aug 16 22:27:16 2005
+++ b/linux-2.6-xen-sparse/include/asm-xen/hypervisor.h Wed Aug 17 20:33:56 2005
@@ -137,10 +137,8 @@
 void xen_create_contiguous_region(unsigned long vstart, unsigned int order);
 void xen_destroy_contiguous_region(unsigned long vstart, unsigned int order);
 
-#ifdef CONFIG_XEN_PHYSDEV_ACCESS
 /* Allocate a contiguous empty region of low memory. Return virtual start. */
 unsigned long allocate_empty_lowmem_region(unsigned long pages);
-#endif
 
 #include <asm/hypercall.h>
 
diff -r 6a6c4a422780 -r 23979fb12c49 
linux-2.6-xen-sparse/include/asm-xen/xenbus.h
--- a/linux-2.6-xen-sparse/include/asm-xen/xenbus.h     Tue Aug 16 22:27:16 2005
+++ b/linux-2.6-xen-sparse/include/asm-xen/xenbus.h     Wed Aug 17 20:33:56 2005
@@ -1,5 +1,3 @@
-#ifndef _ASM_XEN_XENBUS_H
-#define _ASM_XEN_XENBUS_H
 /******************************************************************************
  * xenbus.h
  *
@@ -28,6 +26,10 @@
  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  * IN THE SOFTWARE.
  */
+
+#ifndef _ASM_XEN_XENBUS_H
+#define _ASM_XEN_XENBUS_H
+
 #include <linux/device.h>
 #include <linux/notifier.h>
 #include <asm/semaphore.h>
@@ -119,6 +121,7 @@
 
 int register_xenbus_watch(struct xenbus_watch *watch);
 void unregister_xenbus_watch(struct xenbus_watch *watch);
+void reregister_xenbus_watches(void);
 
 /* Called from xen core code. */
 void xenbus_suspend(void);
diff -r 6a6c4a422780 -r 23979fb12c49 tools/examples/network-bridge
--- a/tools/examples/network-bridge     Tue Aug 16 22:27:16 2005
+++ b/tools/examples/network-bridge     Wed Aug 17 20:33:56 2005
@@ -189,7 +189,7 @@
        fi
        ip link set ${netdev} name p${netdev}
        ip link set veth0 name ${netdev}
-       ifconfig p${netdev} -arp down
+       ifconfig p${netdev} 0.0.0.0 -arp down
        ifconfig p${netdev} hw ether fe:ff:ff:ff:ff:ff
        ifconfig ${netdev} hw ether ${mac}
        add_to_bridge ${bridge} vif0.0
diff -r 6a6c4a422780 -r 23979fb12c49 tools/misc/xend
--- a/tools/misc/xend   Tue Aug 16 22:27:16 2005
+++ b/tools/misc/xend   Wed Aug 17 20:33:56 2005
@@ -117,11 +117,15 @@
        return    
 
 def start_xenstored():
-    s,o = commands.getstatusoutput("/usr/sbin/xenstored 
--pid-file=/var/run/xenstore.pid");
+    XENSTORED_TRACE = os.getenv("XENSTORED_TRACE")
+    cmd = "/usr/sbin/xenstored --pid-file=/var/run/xenstore.pid"
+    if XENSTORED_TRACE:
+        cmd += " -T /var/log/xenstored-trace.log"
+    s,o = commands.getstatusoutput(cmd)
 
 def start_consoled():
     if os.fork() == 0:
-        os.execvp('/usr/sbin/xenconsoled', ['/usr/sbin/xenconsoled']);
+        os.execvp('/usr/sbin/xenconsoled', ['/usr/sbin/xenconsoled'])
             
 def main():
     try:
diff -r 6a6c4a422780 -r 23979fb12c49 tools/python/xen/xend/XendDomain.py
--- a/tools/python/xen/xend/XendDomain.py       Tue Aug 16 22:27:16 2005
+++ b/tools/python/xen/xend/XendDomain.py       Wed Aug 17 20:33:56 2005
@@ -320,8 +320,7 @@
         @param vmconfig: vm configuration
         """
         config = sxp.child_value(vmconfig, 'config')
-        uuid = sxp.child_value(vmconfig, 'uuid')
-        dominfo = XendDomainInfo.restore(self.dbmap, config, uuid=uuid)
+        dominfo = XendDomainInfo.restore(self.dbmap, config)
         return dominfo
 
     def domain_restore(self, src, progress=False):
diff -r 6a6c4a422780 -r 23979fb12c49 tools/python/xen/xend/XendDomainInfo.py
--- a/tools/python/xen/xend/XendDomainInfo.py   Tue Aug 16 22:27:16 2005
+++ b/tools/python/xen/xend/XendDomainInfo.py   Wed Aug 17 20:33:56 2005
@@ -195,19 +195,22 @@
 
     recreate = classmethod(recreate)
 
-    def restore(cls, parentdb, config, uuid):
+    def restore(cls, parentdb, config, uuid=None):
         """Create a domain and a VM object to do a restore.
 
         @param parentdb:  parent db
         @param config:    domain configuration
         @param uuid:      uuid to use
         """
+        if not uuid:
+            uuid = getUuid()
         db = parentdb.addChild(uuid)
         vm = cls(db)
         ssidref = int(sxp.child_value(config, 'ssidref'))
         log.debug('restoring with ssidref='+str(ssidref))
         id = xc.domain_create(ssidref = ssidref)
         vm.setdom(id)
+        vm.clear_shutdown()
         try:
             vm.restore = True
             vm.construct(config)
@@ -979,6 +982,11 @@
         if not reason in ['suspend']:
             self.shutdown_pending = {'start':time.time(), 'reason':reason}
 
+    def clear_shutdown(self):
+        db = self.db.addChild("/control")
+        db['shutdown'] = ""
+        db.saveDB(save=True)
+
     def send_sysrq(self, key=0):
         db = self.db.addChild("/control");
         db['sysrq'] = '%c' % key;
diff -r 6a6c4a422780 -r 23979fb12c49 tools/python/xen/xm/create.py
--- a/tools/python/xen/xm/create.py     Tue Aug 16 22:27:16 2005
+++ b/tools/python/xen/xm/create.py     Wed Aug 17 20:33:56 2005
@@ -380,7 +380,6 @@
 
     @return: MAC address string
     """
-    random.seed()
     mac = [ 0xaa, 0x00, 0x00,
             random.randint(0x00, 0x7f),
             random.randint(0x00, 0xff),
@@ -689,6 +688,7 @@
     del xc
 
 def main(argv):
+    random.seed()
     opts = gopts
     args = opts.parse(argv)
     if opts.vals.help:
diff -r 6a6c4a422780 -r 23979fb12c49 tools/xenstore/xenstored.h
--- a/tools/xenstore/xenstored.h        Tue Aug 16 22:27:16 2005
+++ b/tools/xenstore/xenstored.h        Wed Aug 17 20:33:56 2005
@@ -1,21 +1,29 @@
-/* 
-    Simple prototyle Xen Store Daemon providing simple tree-like database.
-    Copyright (C) 2005 Rusty Russell IBM Corporation
+/*
+ * Simple prototyle Xen Store Daemon providing simple tree-like database.
+ * Copyright (C) 2005 Rusty Russell IBM Corporation
+ *
+ * This file may be distributed separately from the Linux kernel, or
+ * incorporated into other software packages, subject to the following license:
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
 
-    This program is free software; you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation; either version 2 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License
-    along with this program; if not, write to the Free Software
-    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
-*/
 #ifndef _XENSTORED_H
 #define _XENSTORED_H
 
diff -r 6a6c4a422780 -r 23979fb12c49 xen/arch/ia64/xenmisc.c
--- a/xen/arch/ia64/xenmisc.c   Tue Aug 16 22:27:16 2005
+++ b/xen/arch/ia64/xenmisc.c   Wed Aug 17 20:33:56 2005
@@ -280,7 +280,6 @@
 
 unsigned long context_switch_count = 0;
 
-// context_switch
 void context_switch(struct vcpu *prev, struct vcpu *next)
 {
 //printk("@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@\n");
@@ -290,22 +289,14 @@
 //if (prev->domain->domain_id == 0 && next->domain->domain_id == 1) cs01foo();
 //printk("@@sw %d->%d\n",prev->domain->domain_id,next->domain->domain_id);
 #ifdef CONFIG_VTI
-       unsigned long psr;
-       /* Interrupt is enabled after next task is chosen.
-        * So we have to disable it for stack switch.
-        */
-       local_irq_save(psr);
        vtm_domain_out(prev);
-       /* Housekeeping for prev domain */
-#endif // CONFIG_VTI
-
+#endif
        context_switch_count++;
        switch_to(prev,next,prev);
 #ifdef CONFIG_VTI
-       /* Post-setup for new domain */
         vtm_domain_in(current);
-       local_irq_restore(psr);
-#endif // CONFIG_VTI
+#endif
+
 // leave this debug for now: it acts as a heartbeat when more than
 // one domain is active
 {
@@ -315,25 +306,27 @@
 if (!cnt[id]--) { printk("%x",id); cnt[id] = 500000; }
 if (!i--) { printk("+",id); i = 1000000; }
 }
-       clear_bit(_VCPUF_running, &prev->vcpu_flags);
-       //if (!is_idle_task(next->domain) )
-               //send_guest_virq(next, VIRQ_TIMER);
+
 #ifdef CONFIG_VTI
        if (VMX_DOMAIN(current))
                vmx_load_all_rr(current);
-       return;
-#else // CONFIG_VTI
+#else
        if (!is_idle_task(current->domain)) {
                load_region_regs(current);
                if (vcpu_timer_expired(current)) vcpu_pend_timer(current);
        }
        if (vcpu_timer_expired(current)) vcpu_pend_timer(current);
-#endif // CONFIG_VTI
+#endif
+}
+
+void context_switch_finalise(struct vcpu *next)
+{
+       /* nothing to do */
 }
 
 void continue_running(struct vcpu *same)
 {
-    /* nothing to do */
+       /* nothing to do */
 }
 
 void panic_domain(struct pt_regs *regs, const char *fmt, ...)
diff -r 6a6c4a422780 -r 23979fb12c49 xen/arch/x86/domain.c
--- a/xen/arch/x86/domain.c     Tue Aug 16 22:27:16 2005
+++ b/xen/arch/x86/domain.c     Wed Aug 17 20:33:56 2005
@@ -48,6 +48,8 @@
 
 struct percpu_ctxt {
     struct vcpu *curr_vcpu;
+    unsigned int context_not_finalised;
+    unsigned int dirty_segment_mask;
 } __cacheline_aligned;
 static struct percpu_ctxt percpu_ctxt[NR_CPUS];
 
@@ -541,51 +543,59 @@
     __r; })
 
 #if CONFIG_VMX
-#define load_msrs(_p, _n)     if (vmx_switch_on) vmx_load_msrs((_p), (_n))
+#define load_msrs(n)     if (vmx_switch_on) vmx_load_msrs(n)
 #else
-#define load_msrs(_p, _n)     ((void)0)
+#define load_msrs(n)     ((void)0)
 #endif 
 
-static void load_segments(struct vcpu *p, struct vcpu *n)
-{
-    struct vcpu_guest_context *pctxt = &p->arch.guest_context;
+/*
+ * save_segments() writes a mask of segments which are dirty (non-zero),
+ * allowing load_segments() to avoid some expensive segment loads and
+ * MSR writes.
+ */
+#define DIRTY_DS           0x01
+#define DIRTY_ES           0x02
+#define DIRTY_FS           0x04
+#define DIRTY_GS           0x08
+#define DIRTY_FS_BASE      0x10
+#define DIRTY_GS_BASE_USER 0x20
+
+static void load_segments(struct vcpu *n)
+{
     struct vcpu_guest_context *nctxt = &n->arch.guest_context;
     int all_segs_okay = 1;
+    unsigned int dirty_segment_mask, cpu = smp_processor_id();
+
+    /* Load and clear the dirty segment mask. */
+    dirty_segment_mask = percpu_ctxt[cpu].dirty_segment_mask;
+    percpu_ctxt[cpu].dirty_segment_mask = 0;
 
     /* Either selector != 0 ==> reload. */
-    if ( unlikely(pctxt->user_regs.ds | nctxt->user_regs.ds) )
+    if ( unlikely((dirty_segment_mask & DIRTY_DS) | nctxt->user_regs.ds) )
         all_segs_okay &= loadsegment(ds, nctxt->user_regs.ds);
 
     /* Either selector != 0 ==> reload. */
-    if ( unlikely(pctxt->user_regs.es | nctxt->user_regs.es) )
+    if ( unlikely((dirty_segment_mask & DIRTY_ES) | nctxt->user_regs.es) )
         all_segs_okay &= loadsegment(es, nctxt->user_regs.es);
 
     /*
      * Either selector != 0 ==> reload.
      * Also reload to reset FS_BASE if it was non-zero.
      */
-    if ( unlikely(pctxt->user_regs.fs |
-                  pctxt->fs_base |
+    if ( unlikely((dirty_segment_mask & (DIRTY_FS | DIRTY_FS_BASE)) |
                   nctxt->user_regs.fs) )
-    {
         all_segs_okay &= loadsegment(fs, nctxt->user_regs.fs);
-        if ( pctxt->user_regs.fs ) /* != 0 selector kills fs_base */
-            pctxt->fs_base = 0;
-    }
 
     /*
      * Either selector != 0 ==> reload.
      * Also reload to reset GS_BASE if it was non-zero.
      */
-    if ( unlikely(pctxt->user_regs.gs |
-                  pctxt->gs_base_user |
+    if ( unlikely((dirty_segment_mask & (DIRTY_GS | DIRTY_GS_BASE_USER)) |
                   nctxt->user_regs.gs) )
     {
         /* Reset GS_BASE with user %gs? */
-        if ( pctxt->user_regs.gs || !nctxt->gs_base_user )
+        if ( (dirty_segment_mask & DIRTY_GS) || !nctxt->gs_base_user )
             all_segs_okay &= loadsegment(gs, nctxt->user_regs.gs);
-        if ( pctxt->user_regs.gs ) /* != 0 selector kills gs_base_user */
-            pctxt->gs_base_user = 0;
     }
 
     /* This can only be non-zero if selector is NULL. */
@@ -650,7 +660,9 @@
 
 static void save_segments(struct vcpu *v)
 {
-    struct cpu_user_regs *regs = &v->arch.guest_context.user_regs;
+    struct vcpu_guest_context *ctxt = &v->arch.guest_context;
+    struct cpu_user_regs      *regs = &ctxt->user_regs;
+    unsigned int dirty_segment_mask = 0;
 
     if ( VMX_DOMAIN(v) )
         rdmsrl(MSR_SHADOW_GS_BASE, v->arch.arch_vmx.msr_content.shadow_gs);
@@ -659,18 +671,34 @@
     __asm__ __volatile__ ( "movl %%es,%0" : "=m" (regs->es) );
     __asm__ __volatile__ ( "movl %%fs,%0" : "=m" (regs->fs) );
     __asm__ __volatile__ ( "movl %%gs,%0" : "=m" (regs->gs) );
-}
-
-static void clear_segments(void)
-{
-    __asm__ __volatile__ (
-        " movl %0,%%ds; "
-        " movl %0,%%es; "
-        " movl %0,%%fs; "
-        " movl %0,%%gs; "
-        ""safe_swapgs"  "
-        " movl %0,%%gs"
-        : : "r" (0) );
+
+    if ( regs->ds )
+        dirty_segment_mask |= DIRTY_DS;
+
+    if ( regs->es )
+        dirty_segment_mask |= DIRTY_ES;
+
+    if ( regs->fs )
+    {
+        dirty_segment_mask |= DIRTY_FS;
+        ctxt->fs_base = 0; /* != 0 selector kills fs_base */
+    }
+    else if ( ctxt->fs_base )
+    {
+        dirty_segment_mask |= DIRTY_FS_BASE;
+    }
+
+    if ( regs->gs )
+    {
+        dirty_segment_mask |= DIRTY_GS;
+        ctxt->gs_base_user = 0; /* != 0 selector kills gs_base_user */
+    }
+    else if ( ctxt->gs_base_user )
+    {
+        dirty_segment_mask |= DIRTY_GS_BASE_USER;
+    }
+
+    percpu_ctxt[smp_processor_id()].dirty_segment_mask = dirty_segment_mask;
 }
 
 long do_switch_to_user(void)
@@ -706,10 +734,9 @@
 
 #elif defined(__i386__)
 
-#define load_segments(_p, _n) ((void)0)
-#define load_msrs(_p, _n)     ((void)0)
-#define save_segments(_p)     ((void)0)
-#define clear_segments()      ((void)0)
+#define load_segments(n) ((void)0)
+#define load_msrs(n)     ((void)0)
+#define save_segments(p) ((void)0)
 
 static inline void switch_kernel_stack(struct vcpu *n, unsigned int cpu)
 {
@@ -726,9 +753,9 @@
 static void __context_switch(void)
 {
     struct cpu_user_regs *stack_regs = guest_cpu_user_regs();
-    unsigned int         cpu = smp_processor_id();
-    struct vcpu  *p = percpu_ctxt[cpu].curr_vcpu;
-    struct vcpu  *n = current;
+    unsigned int          cpu = smp_processor_id();
+    struct vcpu          *p = percpu_ctxt[cpu].curr_vcpu;
+    struct vcpu          *n = current;
 
     if ( !is_idle_task(p->domain) )
     {
@@ -786,23 +813,31 @@
 
 void context_switch(struct vcpu *prev, struct vcpu *next)
 {
-    struct vcpu *realprev;
-
-    local_irq_disable();
+    unsigned int cpu = smp_processor_id();
+
+    ASSERT(!local_irq_is_enabled());
 
     set_current(next);
 
-    if ( ((realprev = percpu_ctxt[smp_processor_id()].curr_vcpu) == next) || 
-         is_idle_task(next->domain) )
-    {
-        local_irq_enable();
-    }
-    else
+    if ( (percpu_ctxt[cpu].curr_vcpu != next) && !is_idle_task(next->domain) )
     {
         __context_switch();
-
-        local_irq_enable();
-        
+        percpu_ctxt[cpu].context_not_finalised = 1;
+    }
+}
+
+void context_switch_finalise(struct vcpu *next)
+{
+    unsigned int cpu = smp_processor_id();
+
+    ASSERT(local_irq_is_enabled());
+
+    if ( percpu_ctxt[cpu].context_not_finalised )
+    {
+        percpu_ctxt[cpu].context_not_finalised = 0;
+
+        BUG_ON(percpu_ctxt[cpu].curr_vcpu != next);
+
         if ( VMX_DOMAIN(next) )
         {
             vmx_restore_msrs(next);
@@ -810,18 +845,10 @@
         else
         {
             load_LDT(next);
-            load_segments(realprev, next);
-            load_msrs(realprev, next);
-        }
-    }
-
-    /*
-     * We do this late on because it doesn't need to be protected by the
-     * schedule_lock, and because we want this to be the very last use of
-     * 'prev' (after this point, a dying domain's info structure may be freed
-     * without warning). 
-     */
-    clear_bit(_VCPUF_running, &prev->vcpu_flags);
+            load_segments(next);
+            load_msrs(next);
+        }
+    }
 
     schedule_tail(next);
     BUG();
@@ -835,12 +862,19 @@
 
 int __sync_lazy_execstate(void)
 {
-    if ( percpu_ctxt[smp_processor_id()].curr_vcpu == current )
-        return 0;
-    __context_switch();
-    load_LDT(current);
-    clear_segments();
-    return 1;
+    unsigned long flags;
+    int switch_required;
+
+    local_irq_save(flags);
+
+    switch_required = (percpu_ctxt[smp_processor_id()].curr_vcpu != current);
+
+    if ( switch_required )
+        __context_switch();
+
+    local_irq_restore(flags);
+
+    return switch_required;
 }
 
 void sync_lazy_execstate_cpu(unsigned int cpu)
diff -r 6a6c4a422780 -r 23979fb12c49 xen/arch/x86/vmx.c
--- a/xen/arch/x86/vmx.c        Tue Aug 16 22:27:16 2005
+++ b/xen/arch/x86/vmx.c        Wed Aug 17 20:33:56 2005
@@ -65,7 +65,7 @@
  * are not modified once set for generic domains, we don't save them, 
  * but simply reset them to the values set at percpu_traps_init().
  */
-void vmx_load_msrs(struct vcpu *p, struct vcpu *n)
+void vmx_load_msrs(struct vcpu *n)
 {
     struct msr_state *host_state;
     host_state = &percpu_msr[smp_processor_id()];
diff -r 6a6c4a422780 -r 23979fb12c49 xen/common/schedule.c
--- a/xen/common/schedule.c     Tue Aug 16 22:27:16 2005
+++ b/xen/common/schedule.c     Wed Aug 17 20:33:56 2005
@@ -474,13 +474,14 @@
 
     set_ac_timer(&schedule_data[cpu].s_timer, now + r_time);
 
-    /* Must be protected by the schedule_lock! */
+    if ( unlikely(prev == next) )
+    {
+        spin_unlock_irq(&schedule_data[cpu].schedule_lock);
+        return continue_running(prev);
+    }
+
+    clear_bit(_VCPUF_running, &prev->vcpu_flags);
     set_bit(_VCPUF_running, &next->vcpu_flags);
-
-    spin_unlock_irq(&schedule_data[cpu].schedule_lock);
-
-    if ( unlikely(prev == next) )
-        return continue_running(prev);
 
     perfc_incrc(sched_ctx);
 
@@ -517,6 +518,10 @@
              next->domain->domain_id, next->vcpu_id);
 
     context_switch(prev, next);
+
+    spin_unlock_irq(&schedule_data[cpu].schedule_lock);
+
+    context_switch_finalise(next);
 }
 
 /* No locking needed -- pointer comparison is safe :-) */
diff -r 6a6c4a422780 -r 23979fb12c49 xen/include/asm-x86/e820.h
--- a/xen/include/asm-x86/e820.h        Tue Aug 16 22:27:16 2005
+++ b/xen/include/asm-x86/e820.h        Wed Aug 17 20:33:56 2005
@@ -3,7 +3,7 @@
 
 #include <asm/page.h>
 
-#define E820MAX        32
+#define E820MAX        128
 
 #define E820_RAM          1
 #define E820_RESERVED     2
diff -r 6a6c4a422780 -r 23979fb12c49 xen/include/asm-x86/vmx_vmcs.h
--- a/xen/include/asm-x86/vmx_vmcs.h    Tue Aug 16 22:27:16 2005
+++ b/xen/include/asm-x86/vmx_vmcs.h    Wed Aug 17 20:33:56 2005
@@ -28,10 +28,10 @@
 extern void stop_vmx(void);
 
 #if defined (__x86_64__)
-extern void vmx_load_msrs(struct vcpu *p, struct vcpu *n);
+extern void vmx_load_msrs(struct vcpu *n);
 void vmx_restore_msrs(struct vcpu *d);
 #else
-#define vmx_load_msrs(_p, _n)      ((void)0)
+#define vmx_load_msrs(_n)          ((void)0)
 #define vmx_restore_msrs(_v)       ((void)0)
 #endif
 
diff -r 6a6c4a422780 -r 23979fb12c49 xen/include/xen/sched.h
--- a/xen/include/xen/sched.h   Tue Aug 16 22:27:16 2005
+++ b/xen/include/xen/sched.h   Wed Aug 17 20:33:56 2005
@@ -258,12 +258,32 @@
 extern void sync_lazy_execstate_all(void);
 extern int __sync_lazy_execstate(void);
 
-/* Called by the scheduler to switch to another vcpu. */
+/*
+ * Called by the scheduler to switch to another VCPU. On entry, although
+ * VCPUF_running is no longer asserted for @prev, its context is still running
+ * on the local CPU and is not committed to memory. The local scheduler lock
+ * is therefore still held, and interrupts are disabled, because the local CPU
+ * is in an inconsistent state.
+ * 
+ * The callee must ensure that the local CPU is no longer running in @prev's
+ * context, and that the context is saved to memory, before returning.
+ * Alternatively, if implementing lazy context switching, it suffices to ensure
+ * that invoking __sync_lazy_execstate() will switch and commit @prev's state.
+ */
 extern void context_switch(
     struct vcpu *prev, 
     struct vcpu *next);
 
-/* Called by the scheduler to continue running the current vcpu. */
+/*
+ * On some architectures (notably x86) it is not possible to entirely load
+ * @next's context with interrupts disabled. These may implement a function to
+ * finalise loading the new context after interrupts are re-enabled. This
+ * function is not given @prev and is not permitted to access it.
+ */
+extern void context_switch_finalise(
+    struct vcpu *next);
+
+/* Called by the scheduler to continue running the current VCPU. */
 extern void continue_running(
     struct vcpu *same);
 

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.