[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-changelog] Merge.
# HG changeset patch # User adsharma@xxxxxxxxxxxxxxxxxxxx # Node ID 23979fb12c4908a5743b833da8d87e73677c5461 # Parent 6a6c4a422780f0aeb357f2fd8286a36afd3876b8 # Parent fbdbe4fc218de40d5176e0104908e05fb6e2c6ce Merge. diff -r 6a6c4a422780 -r 23979fb12c49 linux-2.6-xen-sparse/arch/xen/i386/kernel/Makefile --- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/Makefile Tue Aug 16 22:27:16 2005 +++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/Makefile Wed Aug 17 20:33:56 2005 @@ -44,7 +44,7 @@ c-obj-$(CONFIG_EFI) += efi.o efi_stub.o c-obj-$(CONFIG_EARLY_PRINTK) += early_printk.o c-obj-$(CONFIG_SMP_ALTERNATIVES)+= smpalts.o -c-obj-$(CONFIG_SWIOTLB) += swiotlb.o +obj-$(CONFIG_SWIOTLB) += swiotlb.o EXTRA_AFLAGS := -traditional diff -r 6a6c4a422780 -r 23979fb12c49 linux-2.6-xen-sparse/arch/xen/i386/kernel/i386_ksyms.c --- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/i386_ksyms.c Tue Aug 16 22:27:16 2005 +++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/i386_ksyms.c Wed Aug 17 20:33:56 2005 @@ -115,9 +115,6 @@ EXPORT_SYMBOL(__copy_to_user_ll); EXPORT_SYMBOL(strnlen_user); -EXPORT_SYMBOL(dma_alloc_coherent); -EXPORT_SYMBOL(dma_free_coherent); - #ifdef CONFIG_PCI EXPORT_SYMBOL(pci_mem_start); #endif diff -r 6a6c4a422780 -r 23979fb12c49 linux-2.6-xen-sparse/arch/xen/i386/kernel/pci-dma.c --- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/pci-dma.c Tue Aug 16 22:27:16 2005 +++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/pci-dma.c Wed Aug 17 20:33:56 2005 @@ -24,13 +24,14 @@ unsigned long *bitmap; }; -static void iommu_bug(void) -{ - printk(KERN_ALERT "Fatal DMA error! Please use 'swiotlb=force'\n"); - BUG(); -} - -#define IOMMU_BUG_ON(test) do { if (unlikely(test)) iommu_bug(); } while(0) +#define IOMMU_BUG_ON(test) \ +do { \ + if (unlikely(test)) { \ + printk(KERN_ALERT "Fatal DMA error! " \ + "Please use 'swiotlb=force'\n"); \ + BUG(); \ + } \ +} while (0) int dma_map_sg(struct device *hwdev, struct scatterlist *sg, int nents, diff -r 6a6c4a422780 -r 23979fb12c49 linux-2.6-xen-sparse/arch/xen/i386/mm/hypervisor.c --- a/linux-2.6-xen-sparse/arch/xen/i386/mm/hypervisor.c Tue Aug 16 22:27:16 2005 +++ b/linux-2.6-xen-sparse/arch/xen/i386/mm/hypervisor.c Wed Aug 17 20:33:56 2005 @@ -35,6 +35,7 @@ #include <asm/pgtable.h> #include <asm-xen/hypervisor.h> #include <asm-xen/balloon.h> +#include <linux/module.h> #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0) #include <linux/percpu.h> #include <asm/tlbflush.h> @@ -352,7 +353,6 @@ balloon_unlock(flags); } -#ifdef CONFIG_XEN_PHYSDEV_ACCESS unsigned long allocate_empty_lowmem_region(unsigned long pages) { @@ -401,4 +401,4 @@ return vstart; } -#endif /* CONFIG_XEN_PHYSDEV_ACCESS */ +EXPORT_SYMBOL(allocate_empty_lowmem_region); diff -r 6a6c4a422780 -r 23979fb12c49 linux-2.6-xen-sparse/arch/xen/kernel/reboot.c --- a/linux-2.6-xen-sparse/arch/xen/kernel/reboot.c Tue Aug 16 22:27:16 2005 +++ b/linux-2.6-xen-sparse/arch/xen/kernel/reboot.c Wed Aug 17 20:33:56 2005 @@ -256,19 +256,23 @@ char *str; str = (char *)xenbus_read("control", "shutdown", NULL); - /* Ignore read errors and recursive shutdown events. */ - if (IS_ERR(str) || !strcmp(str, __stringify(SHUTDOWN_INVALID))) + /* Ignore read errors. */ + if (IS_ERR(str)) return; - - xenbus_printf("control", "shutdown", "%i", SHUTDOWN_INVALID); - - if (strcmp(str, "poweroff") == 0) { + if (strlen(str) == 0) { + kfree(str); + return; + } + + xenbus_write("control", "shutdown", "", O_CREAT); + + if (strcmp(str, "poweroff") == 0) shutting_down = SHUTDOWN_POWEROFF; - } else if (strcmp(str, "reboot") == 0) { + else if (strcmp(str, "reboot") == 0) shutting_down = SHUTDOWN_REBOOT; - } else if (strcmp(str, "suspend") == 0) { + else if (strcmp(str, "suspend") == 0) shutting_down = SHUTDOWN_SUSPEND; - } else { + else { printk("Ignoring shutdown request: %s\n", str); shutting_down = SHUTDOWN_INVALID; } diff -r 6a6c4a422780 -r 23979fb12c49 linux-2.6-xen-sparse/arch/xen/kernel/skbuff.c --- a/linux-2.6-xen-sparse/arch/xen/kernel/skbuff.c Tue Aug 16 22:27:16 2005 +++ b/linux-2.6-xen-sparse/arch/xen/kernel/skbuff.c Wed Aug 17 20:33:56 2005 @@ -5,8 +5,6 @@ #include <linux/kernel.h> #include <linux/sched.h> #include <linux/slab.h> -#include <linux/string.h> -#include <linux/errno.h> #include <linux/netdevice.h> #include <linux/inetdevice.h> #include <linux/etherdevice.h> @@ -14,34 +12,86 @@ #include <linux/init.h> #include <asm/io.h> #include <asm/page.h> - -EXPORT_SYMBOL(__dev_alloc_skb); +#include <asm-xen/hypervisor.h> /* Referenced in netback.c. */ /*static*/ kmem_cache_t *skbuff_cachep; -/* Size must be cacheline-aligned (alloc_skb uses SKB_DATA_ALIGN). */ -#define XEN_SKB_SIZE \ - ((PAGE_SIZE - sizeof(struct skb_shared_info)) & ~(SMP_CACHE_BYTES - 1)) +#define MAX_SKBUFF_ORDER 2 +static kmem_cache_t *skbuff_order_cachep[MAX_SKBUFF_ORDER + 1]; struct sk_buff *__dev_alloc_skb(unsigned int length, int gfp_mask) { - struct sk_buff *skb; - skb = alloc_skb_from_cache(skbuff_cachep, length + 16, gfp_mask); - if ( likely(skb != NULL) ) - skb_reserve(skb, 16); - return skb; + struct sk_buff *skb; + int order; + + length = SKB_DATA_ALIGN(length + 16); + order = get_order(length + sizeof(struct skb_shared_info)); + if (order > MAX_SKBUFF_ORDER) { + printk(KERN_ALERT "Attempt to allocate order %d skbuff. " + "Increase MAX_SKBUFF_ORDER.\n", order); + return NULL; + } + + skb = alloc_skb_from_cache( + skbuff_order_cachep[order], length, gfp_mask); + if (skb != NULL) + skb_reserve(skb, 16); + + return skb; } static void skbuff_ctor(void *buf, kmem_cache_t *cachep, unsigned long unused) { - scrub_pages(buf, 1); + int order = 0; + + while (skbuff_order_cachep[order] != cachep) + order++; + + if (order != 0) + xen_create_contiguous_region((unsigned long)buf, order); + + scrub_pages(buf, 1 << order); +} + +static void skbuff_dtor(void *buf, kmem_cache_t *cachep, unsigned long unused) +{ + int order = 0; + + while (skbuff_order_cachep[order] != cachep) + order++; + + if (order != 0) + xen_destroy_contiguous_region((unsigned long)buf, order); } static int __init skbuff_init(void) { - skbuff_cachep = kmem_cache_create( - "xen-skb", PAGE_SIZE, PAGE_SIZE, 0, skbuff_ctor, NULL); - return 0; + static char name[MAX_SKBUFF_ORDER + 1][20]; + unsigned long size; + int order; + + for (order = 0; order <= MAX_SKBUFF_ORDER; order++) { + size = PAGE_SIZE << order; + sprintf(name[order], "xen-skb-%lu", size); + skbuff_order_cachep[order] = kmem_cache_create( + name[order], size, size, 0, skbuff_ctor, skbuff_dtor); + } + + skbuff_cachep = skbuff_order_cachep[0]; + + return 0; } __initcall(skbuff_init); + +EXPORT_SYMBOL(__dev_alloc_skb); + +/* + * Local variables: + * c-file-style: "linux" + * indent-tabs-mode: t + * c-indent-level: 8 + * c-basic-offset: 8 + * tab-width: 8 + * End: + */ diff -r 6a6c4a422780 -r 23979fb12c49 linux-2.6-xen-sparse/arch/xen/x86_64/mm/fault.c --- a/linux-2.6-xen-sparse/arch/xen/x86_64/mm/fault.c Tue Aug 16 22:27:16 2005 +++ b/linux-2.6-xen-sparse/arch/xen/x86_64/mm/fault.c Wed Aug 17 20:33:56 2005 @@ -250,7 +250,11 @@ happen within a race in page table update. In the later case just flush. */ - pgd = pgd_offset(current->mm ?: &init_mm, address); + /* On Xen the line below does not always work. Needs investigating! */ + /*pgd = pgd_offset(current->mm ?: &init_mm, address);*/ + pgd = (pgd_t *)per_cpu(cur_pgd, smp_processor_id()); + pgd += pgd_index(address); + pgd_ref = pgd_offset_k(address); if (pgd_none(*pgd_ref)) return -1; diff -r 6a6c4a422780 -r 23979fb12c49 linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_comms.h --- a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_comms.h Tue Aug 16 22:27:16 2005 +++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_comms.h Wed Aug 17 20:33:56 2005 @@ -1,6 +1,33 @@ -/* Private include for xenbus communications. */ +/* + * Private include for xenbus communications. + * + * Copyright (C) 2005 Rusty Russell, IBM Corporation + * + * This file may be distributed separately from the Linux kernel, or + * incorporated into other software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + #ifndef _XENBUS_COMMS_H #define _XENBUS_COMMS_H + int xs_init(void); int xb_init_comms(void); void xb_suspend_comms(void); diff -r 6a6c4a422780 -r 23979fb12c49 linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c --- a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c Tue Aug 16 22:27:16 2005 +++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c Wed Aug 17 20:33:56 2005 @@ -309,6 +309,7 @@ void xenbus_resume(void) { xb_init_comms(); + reregister_xenbus_watches(); up(&xenbus_lock); } diff -r 6a6c4a422780 -r 23979fb12c49 linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_xs.c --- a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_xs.c Tue Aug 16 22:27:16 2005 +++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_xs.c Wed Aug 17 20:33:56 2005 @@ -496,6 +496,18 @@ watch->node, err); } +/* Re-register callbacks to all watches. */ +void reregister_xenbus_watches(void) +{ + struct xenbus_watch *watch; + char token[sizeof(watch) * 2 + 1]; + + list_for_each_entry(watch, &watches, list) { + sprintf(token, "%lX", (long)watch); + xs_watch(watch->node, token); + } +} + static int watch_thread(void *unused) { for (;;) { diff -r 6a6c4a422780 -r 23979fb12c49 linux-2.6-xen-sparse/include/asm-xen/hypervisor.h --- a/linux-2.6-xen-sparse/include/asm-xen/hypervisor.h Tue Aug 16 22:27:16 2005 +++ b/linux-2.6-xen-sparse/include/asm-xen/hypervisor.h Wed Aug 17 20:33:56 2005 @@ -137,10 +137,8 @@ void xen_create_contiguous_region(unsigned long vstart, unsigned int order); void xen_destroy_contiguous_region(unsigned long vstart, unsigned int order); -#ifdef CONFIG_XEN_PHYSDEV_ACCESS /* Allocate a contiguous empty region of low memory. Return virtual start. */ unsigned long allocate_empty_lowmem_region(unsigned long pages); -#endif #include <asm/hypercall.h> diff -r 6a6c4a422780 -r 23979fb12c49 linux-2.6-xen-sparse/include/asm-xen/xenbus.h --- a/linux-2.6-xen-sparse/include/asm-xen/xenbus.h Tue Aug 16 22:27:16 2005 +++ b/linux-2.6-xen-sparse/include/asm-xen/xenbus.h Wed Aug 17 20:33:56 2005 @@ -1,5 +1,3 @@ -#ifndef _ASM_XEN_XENBUS_H -#define _ASM_XEN_XENBUS_H /****************************************************************************** * xenbus.h * @@ -28,6 +26,10 @@ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS * IN THE SOFTWARE. */ + +#ifndef _ASM_XEN_XENBUS_H +#define _ASM_XEN_XENBUS_H + #include <linux/device.h> #include <linux/notifier.h> #include <asm/semaphore.h> @@ -119,6 +121,7 @@ int register_xenbus_watch(struct xenbus_watch *watch); void unregister_xenbus_watch(struct xenbus_watch *watch); +void reregister_xenbus_watches(void); /* Called from xen core code. */ void xenbus_suspend(void); diff -r 6a6c4a422780 -r 23979fb12c49 tools/examples/network-bridge --- a/tools/examples/network-bridge Tue Aug 16 22:27:16 2005 +++ b/tools/examples/network-bridge Wed Aug 17 20:33:56 2005 @@ -189,7 +189,7 @@ fi ip link set ${netdev} name p${netdev} ip link set veth0 name ${netdev} - ifconfig p${netdev} -arp down + ifconfig p${netdev} 0.0.0.0 -arp down ifconfig p${netdev} hw ether fe:ff:ff:ff:ff:ff ifconfig ${netdev} hw ether ${mac} add_to_bridge ${bridge} vif0.0 diff -r 6a6c4a422780 -r 23979fb12c49 tools/misc/xend --- a/tools/misc/xend Tue Aug 16 22:27:16 2005 +++ b/tools/misc/xend Wed Aug 17 20:33:56 2005 @@ -117,11 +117,15 @@ return def start_xenstored(): - s,o = commands.getstatusoutput("/usr/sbin/xenstored --pid-file=/var/run/xenstore.pid"); + XENSTORED_TRACE = os.getenv("XENSTORED_TRACE") + cmd = "/usr/sbin/xenstored --pid-file=/var/run/xenstore.pid" + if XENSTORED_TRACE: + cmd += " -T /var/log/xenstored-trace.log" + s,o = commands.getstatusoutput(cmd) def start_consoled(): if os.fork() == 0: - os.execvp('/usr/sbin/xenconsoled', ['/usr/sbin/xenconsoled']); + os.execvp('/usr/sbin/xenconsoled', ['/usr/sbin/xenconsoled']) def main(): try: diff -r 6a6c4a422780 -r 23979fb12c49 tools/python/xen/xend/XendDomain.py --- a/tools/python/xen/xend/XendDomain.py Tue Aug 16 22:27:16 2005 +++ b/tools/python/xen/xend/XendDomain.py Wed Aug 17 20:33:56 2005 @@ -320,8 +320,7 @@ @param vmconfig: vm configuration """ config = sxp.child_value(vmconfig, 'config') - uuid = sxp.child_value(vmconfig, 'uuid') - dominfo = XendDomainInfo.restore(self.dbmap, config, uuid=uuid) + dominfo = XendDomainInfo.restore(self.dbmap, config) return dominfo def domain_restore(self, src, progress=False): diff -r 6a6c4a422780 -r 23979fb12c49 tools/python/xen/xend/XendDomainInfo.py --- a/tools/python/xen/xend/XendDomainInfo.py Tue Aug 16 22:27:16 2005 +++ b/tools/python/xen/xend/XendDomainInfo.py Wed Aug 17 20:33:56 2005 @@ -195,19 +195,22 @@ recreate = classmethod(recreate) - def restore(cls, parentdb, config, uuid): + def restore(cls, parentdb, config, uuid=None): """Create a domain and a VM object to do a restore. @param parentdb: parent db @param config: domain configuration @param uuid: uuid to use """ + if not uuid: + uuid = getUuid() db = parentdb.addChild(uuid) vm = cls(db) ssidref = int(sxp.child_value(config, 'ssidref')) log.debug('restoring with ssidref='+str(ssidref)) id = xc.domain_create(ssidref = ssidref) vm.setdom(id) + vm.clear_shutdown() try: vm.restore = True vm.construct(config) @@ -979,6 +982,11 @@ if not reason in ['suspend']: self.shutdown_pending = {'start':time.time(), 'reason':reason} + def clear_shutdown(self): + db = self.db.addChild("/control") + db['shutdown'] = "" + db.saveDB(save=True) + def send_sysrq(self, key=0): db = self.db.addChild("/control"); db['sysrq'] = '%c' % key; diff -r 6a6c4a422780 -r 23979fb12c49 tools/python/xen/xm/create.py --- a/tools/python/xen/xm/create.py Tue Aug 16 22:27:16 2005 +++ b/tools/python/xen/xm/create.py Wed Aug 17 20:33:56 2005 @@ -380,7 +380,6 @@ @return: MAC address string """ - random.seed() mac = [ 0xaa, 0x00, 0x00, random.randint(0x00, 0x7f), random.randint(0x00, 0xff), @@ -689,6 +688,7 @@ del xc def main(argv): + random.seed() opts = gopts args = opts.parse(argv) if opts.vals.help: diff -r 6a6c4a422780 -r 23979fb12c49 tools/xenstore/xenstored.h --- a/tools/xenstore/xenstored.h Tue Aug 16 22:27:16 2005 +++ b/tools/xenstore/xenstored.h Wed Aug 17 20:33:56 2005 @@ -1,21 +1,29 @@ -/* - Simple prototyle Xen Store Daemon providing simple tree-like database. - Copyright (C) 2005 Rusty Russell IBM Corporation +/* + * Simple prototyle Xen Store Daemon providing simple tree-like database. + * Copyright (C) 2005 Rusty Russell IBM Corporation + * + * This file may be distributed separately from the Linux kernel, or + * incorporated into other software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -*/ #ifndef _XENSTORED_H #define _XENSTORED_H diff -r 6a6c4a422780 -r 23979fb12c49 xen/arch/ia64/xenmisc.c --- a/xen/arch/ia64/xenmisc.c Tue Aug 16 22:27:16 2005 +++ b/xen/arch/ia64/xenmisc.c Wed Aug 17 20:33:56 2005 @@ -280,7 +280,6 @@ unsigned long context_switch_count = 0; -// context_switch void context_switch(struct vcpu *prev, struct vcpu *next) { //printk("@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@\n"); @@ -290,22 +289,14 @@ //if (prev->domain->domain_id == 0 && next->domain->domain_id == 1) cs01foo(); //printk("@@sw %d->%d\n",prev->domain->domain_id,next->domain->domain_id); #ifdef CONFIG_VTI - unsigned long psr; - /* Interrupt is enabled after next task is chosen. - * So we have to disable it for stack switch. - */ - local_irq_save(psr); vtm_domain_out(prev); - /* Housekeeping for prev domain */ -#endif // CONFIG_VTI - +#endif context_switch_count++; switch_to(prev,next,prev); #ifdef CONFIG_VTI - /* Post-setup for new domain */ vtm_domain_in(current); - local_irq_restore(psr); -#endif // CONFIG_VTI +#endif + // leave this debug for now: it acts as a heartbeat when more than // one domain is active { @@ -315,25 +306,27 @@ if (!cnt[id]--) { printk("%x",id); cnt[id] = 500000; } if (!i--) { printk("+",id); i = 1000000; } } - clear_bit(_VCPUF_running, &prev->vcpu_flags); - //if (!is_idle_task(next->domain) ) - //send_guest_virq(next, VIRQ_TIMER); + #ifdef CONFIG_VTI if (VMX_DOMAIN(current)) vmx_load_all_rr(current); - return; -#else // CONFIG_VTI +#else if (!is_idle_task(current->domain)) { load_region_regs(current); if (vcpu_timer_expired(current)) vcpu_pend_timer(current); } if (vcpu_timer_expired(current)) vcpu_pend_timer(current); -#endif // CONFIG_VTI +#endif +} + +void context_switch_finalise(struct vcpu *next) +{ + /* nothing to do */ } void continue_running(struct vcpu *same) { - /* nothing to do */ + /* nothing to do */ } void panic_domain(struct pt_regs *regs, const char *fmt, ...) diff -r 6a6c4a422780 -r 23979fb12c49 xen/arch/x86/domain.c --- a/xen/arch/x86/domain.c Tue Aug 16 22:27:16 2005 +++ b/xen/arch/x86/domain.c Wed Aug 17 20:33:56 2005 @@ -48,6 +48,8 @@ struct percpu_ctxt { struct vcpu *curr_vcpu; + unsigned int context_not_finalised; + unsigned int dirty_segment_mask; } __cacheline_aligned; static struct percpu_ctxt percpu_ctxt[NR_CPUS]; @@ -541,51 +543,59 @@ __r; }) #if CONFIG_VMX -#define load_msrs(_p, _n) if (vmx_switch_on) vmx_load_msrs((_p), (_n)) +#define load_msrs(n) if (vmx_switch_on) vmx_load_msrs(n) #else -#define load_msrs(_p, _n) ((void)0) +#define load_msrs(n) ((void)0) #endif -static void load_segments(struct vcpu *p, struct vcpu *n) -{ - struct vcpu_guest_context *pctxt = &p->arch.guest_context; +/* + * save_segments() writes a mask of segments which are dirty (non-zero), + * allowing load_segments() to avoid some expensive segment loads and + * MSR writes. + */ +#define DIRTY_DS 0x01 +#define DIRTY_ES 0x02 +#define DIRTY_FS 0x04 +#define DIRTY_GS 0x08 +#define DIRTY_FS_BASE 0x10 +#define DIRTY_GS_BASE_USER 0x20 + +static void load_segments(struct vcpu *n) +{ struct vcpu_guest_context *nctxt = &n->arch.guest_context; int all_segs_okay = 1; + unsigned int dirty_segment_mask, cpu = smp_processor_id(); + + /* Load and clear the dirty segment mask. */ + dirty_segment_mask = percpu_ctxt[cpu].dirty_segment_mask; + percpu_ctxt[cpu].dirty_segment_mask = 0; /* Either selector != 0 ==> reload. */ - if ( unlikely(pctxt->user_regs.ds | nctxt->user_regs.ds) ) + if ( unlikely((dirty_segment_mask & DIRTY_DS) | nctxt->user_regs.ds) ) all_segs_okay &= loadsegment(ds, nctxt->user_regs.ds); /* Either selector != 0 ==> reload. */ - if ( unlikely(pctxt->user_regs.es | nctxt->user_regs.es) ) + if ( unlikely((dirty_segment_mask & DIRTY_ES) | nctxt->user_regs.es) ) all_segs_okay &= loadsegment(es, nctxt->user_regs.es); /* * Either selector != 0 ==> reload. * Also reload to reset FS_BASE if it was non-zero. */ - if ( unlikely(pctxt->user_regs.fs | - pctxt->fs_base | + if ( unlikely((dirty_segment_mask & (DIRTY_FS | DIRTY_FS_BASE)) | nctxt->user_regs.fs) ) - { all_segs_okay &= loadsegment(fs, nctxt->user_regs.fs); - if ( pctxt->user_regs.fs ) /* != 0 selector kills fs_base */ - pctxt->fs_base = 0; - } /* * Either selector != 0 ==> reload. * Also reload to reset GS_BASE if it was non-zero. */ - if ( unlikely(pctxt->user_regs.gs | - pctxt->gs_base_user | + if ( unlikely((dirty_segment_mask & (DIRTY_GS | DIRTY_GS_BASE_USER)) | nctxt->user_regs.gs) ) { /* Reset GS_BASE with user %gs? */ - if ( pctxt->user_regs.gs || !nctxt->gs_base_user ) + if ( (dirty_segment_mask & DIRTY_GS) || !nctxt->gs_base_user ) all_segs_okay &= loadsegment(gs, nctxt->user_regs.gs); - if ( pctxt->user_regs.gs ) /* != 0 selector kills gs_base_user */ - pctxt->gs_base_user = 0; } /* This can only be non-zero if selector is NULL. */ @@ -650,7 +660,9 @@ static void save_segments(struct vcpu *v) { - struct cpu_user_regs *regs = &v->arch.guest_context.user_regs; + struct vcpu_guest_context *ctxt = &v->arch.guest_context; + struct cpu_user_regs *regs = &ctxt->user_regs; + unsigned int dirty_segment_mask = 0; if ( VMX_DOMAIN(v) ) rdmsrl(MSR_SHADOW_GS_BASE, v->arch.arch_vmx.msr_content.shadow_gs); @@ -659,18 +671,34 @@ __asm__ __volatile__ ( "movl %%es,%0" : "=m" (regs->es) ); __asm__ __volatile__ ( "movl %%fs,%0" : "=m" (regs->fs) ); __asm__ __volatile__ ( "movl %%gs,%0" : "=m" (regs->gs) ); -} - -static void clear_segments(void) -{ - __asm__ __volatile__ ( - " movl %0,%%ds; " - " movl %0,%%es; " - " movl %0,%%fs; " - " movl %0,%%gs; " - ""safe_swapgs" " - " movl %0,%%gs" - : : "r" (0) ); + + if ( regs->ds ) + dirty_segment_mask |= DIRTY_DS; + + if ( regs->es ) + dirty_segment_mask |= DIRTY_ES; + + if ( regs->fs ) + { + dirty_segment_mask |= DIRTY_FS; + ctxt->fs_base = 0; /* != 0 selector kills fs_base */ + } + else if ( ctxt->fs_base ) + { + dirty_segment_mask |= DIRTY_FS_BASE; + } + + if ( regs->gs ) + { + dirty_segment_mask |= DIRTY_GS; + ctxt->gs_base_user = 0; /* != 0 selector kills gs_base_user */ + } + else if ( ctxt->gs_base_user ) + { + dirty_segment_mask |= DIRTY_GS_BASE_USER; + } + + percpu_ctxt[smp_processor_id()].dirty_segment_mask = dirty_segment_mask; } long do_switch_to_user(void) @@ -706,10 +734,9 @@ #elif defined(__i386__) -#define load_segments(_p, _n) ((void)0) -#define load_msrs(_p, _n) ((void)0) -#define save_segments(_p) ((void)0) -#define clear_segments() ((void)0) +#define load_segments(n) ((void)0) +#define load_msrs(n) ((void)0) +#define save_segments(p) ((void)0) static inline void switch_kernel_stack(struct vcpu *n, unsigned int cpu) { @@ -726,9 +753,9 @@ static void __context_switch(void) { struct cpu_user_regs *stack_regs = guest_cpu_user_regs(); - unsigned int cpu = smp_processor_id(); - struct vcpu *p = percpu_ctxt[cpu].curr_vcpu; - struct vcpu *n = current; + unsigned int cpu = smp_processor_id(); + struct vcpu *p = percpu_ctxt[cpu].curr_vcpu; + struct vcpu *n = current; if ( !is_idle_task(p->domain) ) { @@ -786,23 +813,31 @@ void context_switch(struct vcpu *prev, struct vcpu *next) { - struct vcpu *realprev; - - local_irq_disable(); + unsigned int cpu = smp_processor_id(); + + ASSERT(!local_irq_is_enabled()); set_current(next); - if ( ((realprev = percpu_ctxt[smp_processor_id()].curr_vcpu) == next) || - is_idle_task(next->domain) ) - { - local_irq_enable(); - } - else + if ( (percpu_ctxt[cpu].curr_vcpu != next) && !is_idle_task(next->domain) ) { __context_switch(); - - local_irq_enable(); - + percpu_ctxt[cpu].context_not_finalised = 1; + } +} + +void context_switch_finalise(struct vcpu *next) +{ + unsigned int cpu = smp_processor_id(); + + ASSERT(local_irq_is_enabled()); + + if ( percpu_ctxt[cpu].context_not_finalised ) + { + percpu_ctxt[cpu].context_not_finalised = 0; + + BUG_ON(percpu_ctxt[cpu].curr_vcpu != next); + if ( VMX_DOMAIN(next) ) { vmx_restore_msrs(next); @@ -810,18 +845,10 @@ else { load_LDT(next); - load_segments(realprev, next); - load_msrs(realprev, next); - } - } - - /* - * We do this late on because it doesn't need to be protected by the - * schedule_lock, and because we want this to be the very last use of - * 'prev' (after this point, a dying domain's info structure may be freed - * without warning). - */ - clear_bit(_VCPUF_running, &prev->vcpu_flags); + load_segments(next); + load_msrs(next); + } + } schedule_tail(next); BUG(); @@ -835,12 +862,19 @@ int __sync_lazy_execstate(void) { - if ( percpu_ctxt[smp_processor_id()].curr_vcpu == current ) - return 0; - __context_switch(); - load_LDT(current); - clear_segments(); - return 1; + unsigned long flags; + int switch_required; + + local_irq_save(flags); + + switch_required = (percpu_ctxt[smp_processor_id()].curr_vcpu != current); + + if ( switch_required ) + __context_switch(); + + local_irq_restore(flags); + + return switch_required; } void sync_lazy_execstate_cpu(unsigned int cpu) diff -r 6a6c4a422780 -r 23979fb12c49 xen/arch/x86/vmx.c --- a/xen/arch/x86/vmx.c Tue Aug 16 22:27:16 2005 +++ b/xen/arch/x86/vmx.c Wed Aug 17 20:33:56 2005 @@ -65,7 +65,7 @@ * are not modified once set for generic domains, we don't save them, * but simply reset them to the values set at percpu_traps_init(). */ -void vmx_load_msrs(struct vcpu *p, struct vcpu *n) +void vmx_load_msrs(struct vcpu *n) { struct msr_state *host_state; host_state = &percpu_msr[smp_processor_id()]; diff -r 6a6c4a422780 -r 23979fb12c49 xen/common/schedule.c --- a/xen/common/schedule.c Tue Aug 16 22:27:16 2005 +++ b/xen/common/schedule.c Wed Aug 17 20:33:56 2005 @@ -474,13 +474,14 @@ set_ac_timer(&schedule_data[cpu].s_timer, now + r_time); - /* Must be protected by the schedule_lock! */ + if ( unlikely(prev == next) ) + { + spin_unlock_irq(&schedule_data[cpu].schedule_lock); + return continue_running(prev); + } + + clear_bit(_VCPUF_running, &prev->vcpu_flags); set_bit(_VCPUF_running, &next->vcpu_flags); - - spin_unlock_irq(&schedule_data[cpu].schedule_lock); - - if ( unlikely(prev == next) ) - return continue_running(prev); perfc_incrc(sched_ctx); @@ -517,6 +518,10 @@ next->domain->domain_id, next->vcpu_id); context_switch(prev, next); + + spin_unlock_irq(&schedule_data[cpu].schedule_lock); + + context_switch_finalise(next); } /* No locking needed -- pointer comparison is safe :-) */ diff -r 6a6c4a422780 -r 23979fb12c49 xen/include/asm-x86/e820.h --- a/xen/include/asm-x86/e820.h Tue Aug 16 22:27:16 2005 +++ b/xen/include/asm-x86/e820.h Wed Aug 17 20:33:56 2005 @@ -3,7 +3,7 @@ #include <asm/page.h> -#define E820MAX 32 +#define E820MAX 128 #define E820_RAM 1 #define E820_RESERVED 2 diff -r 6a6c4a422780 -r 23979fb12c49 xen/include/asm-x86/vmx_vmcs.h --- a/xen/include/asm-x86/vmx_vmcs.h Tue Aug 16 22:27:16 2005 +++ b/xen/include/asm-x86/vmx_vmcs.h Wed Aug 17 20:33:56 2005 @@ -28,10 +28,10 @@ extern void stop_vmx(void); #if defined (__x86_64__) -extern void vmx_load_msrs(struct vcpu *p, struct vcpu *n); +extern void vmx_load_msrs(struct vcpu *n); void vmx_restore_msrs(struct vcpu *d); #else -#define vmx_load_msrs(_p, _n) ((void)0) +#define vmx_load_msrs(_n) ((void)0) #define vmx_restore_msrs(_v) ((void)0) #endif diff -r 6a6c4a422780 -r 23979fb12c49 xen/include/xen/sched.h --- a/xen/include/xen/sched.h Tue Aug 16 22:27:16 2005 +++ b/xen/include/xen/sched.h Wed Aug 17 20:33:56 2005 @@ -258,12 +258,32 @@ extern void sync_lazy_execstate_all(void); extern int __sync_lazy_execstate(void); -/* Called by the scheduler to switch to another vcpu. */ +/* + * Called by the scheduler to switch to another VCPU. On entry, although + * VCPUF_running is no longer asserted for @prev, its context is still running + * on the local CPU and is not committed to memory. The local scheduler lock + * is therefore still held, and interrupts are disabled, because the local CPU + * is in an inconsistent state. + * + * The callee must ensure that the local CPU is no longer running in @prev's + * context, and that the context is saved to memory, before returning. + * Alternatively, if implementing lazy context switching, it suffices to ensure + * that invoking __sync_lazy_execstate() will switch and commit @prev's state. + */ extern void context_switch( struct vcpu *prev, struct vcpu *next); -/* Called by the scheduler to continue running the current vcpu. */ +/* + * On some architectures (notably x86) it is not possible to entirely load + * @next's context with interrupts disabled. These may implement a function to + * finalise loading the new context after interrupts are re-enabled. This + * function is not given @prev and is not permitted to access it. + */ +extern void context_switch_finalise( + struct vcpu *next); + +/* Called by the scheduler to continue running the current VCPU. */ extern void continue_running( struct vcpu *same); _______________________________________________ Xen-changelog mailing list Xen-changelog@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-changelog
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |