[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-changelog] merge?
# HG changeset patch # User cl349@xxxxxxxxxxxxxxxxxxxx # Node ID 522bc50588eda1c0bba0562a16fe8edd1a715f09 # Parent 6783e59e1c45c858d76d0e101ac6f9a5a8fef4a7 # Parent df11e0709383f518b0c09ba50ec8d051170bb3c0 merge? diff -r 6783e59e1c45 -r 522bc50588ed .hgignore --- a/.hgignore Tue Aug 23 18:25:51 2005 +++ b/.hgignore Tue Aug 23 18:27:22 2005 @@ -147,6 +147,7 @@ ^tools/xcs/xcsdump$ ^tools/xcutils/xc_restore$ ^tools/xcutils/xc_save$ +^tools/xenstat/xentop/xentop$ ^tools/xenstore/testsuite/tmp/.*$ ^tools/xenstore/xen$ ^tools/xenstore/xenstored$ diff -r 6783e59e1c45 -r 522bc50588ed linux-2.6-xen-sparse/arch/xen/configs/xen0_defconfig_x86_64 --- a/linux-2.6-xen-sparse/arch/xen/configs/xen0_defconfig_x86_64 Tue Aug 23 18:25:51 2005 +++ b/linux-2.6-xen-sparse/arch/xen/configs/xen0_defconfig_x86_64 Tue Aug 23 18:27:22 2005 @@ -807,7 +807,107 @@ # CONFIG_USB_ARCH_HAS_HCD=y CONFIG_USB_ARCH_HAS_OHCI=y -# CONFIG_USB is not set +CONFIG_USB=y +# CONFIG_USB_DEBUG is not set + +# +# Miscellaneous USB options +# +# CONFIG_USB_DEVICEFS is not set +# CONFIG_USB_BANDWIDTH is not set +# CONFIG_USB_DYNAMIC_MINORS is not set +# CONFIG_USB_OTG is not set + +# +# USB Host Controller Drivers +# +# CONFIG_USB_EHCI_HCD is not set +CONFIG_USB_OHCI_HCD=y +# CONFIG_USB_OHCI_BIG_ENDIAN is not set +CONFIG_USB_OHCI_LITTLE_ENDIAN=y +CONFIG_USB_UHCI_HCD=y +# CONFIG_USB_SL811_HCD is not set + +# +# USB Device Class drivers +# +# CONFIG_USB_BLUETOOTH_TTY is not set +# CONFIG_USB_ACM is not set +# CONFIG_USB_PRINTER is not set + +# +# NOTE: USB_STORAGE enables SCSI, and 'SCSI disk support' may also be needed; see USB_STORAGE Help for more information +# +# CONFIG_USB_STORAGE is not set + +# +# USB Input Devices +# +CONFIG_USB_HID=y +CONFIG_USB_HIDINPUT=y +# CONFIG_HID_FF is not set +# CONFIG_USB_HIDDEV is not set +# CONFIG_USB_AIPTEK is not set +# CONFIG_USB_WACOM is not set +# CONFIG_USB_KBTAB is not set +# CONFIG_USB_POWERMATE is not set +# CONFIG_USB_MTOUCH is not set +# CONFIG_USB_EGALAX is not set +# CONFIG_USB_XPAD is not set +# CONFIG_USB_ATI_REMOTE is not set + +# +# USB Imaging devices +# +# CONFIG_USB_MDC800 is not set +# CONFIG_USB_MICROTEK is not set + +# +# USB Multimedia devices +# +# CONFIG_USB_DABUSB is not set + +# +# Video4Linux support is needed for USB Multimedia device support +# + +# +# USB Network Adapters +# +# CONFIG_USB_CATC is not set +# CONFIG_USB_KAWETH is not set +# CONFIG_USB_PEGASUS is not set +# CONFIG_USB_RTL8150 is not set +# CONFIG_USB_USBNET is not set +CONFIG_USB_MON=y + +# +# USB port drivers +# + +# +# USB Serial Converter support +# +# CONFIG_USB_SERIAL is not set + +# +# USB Miscellaneous drivers +# +# CONFIG_USB_EMI62 is not set +# CONFIG_USB_EMI26 is not set +# CONFIG_USB_AUERSWALD is not set +# CONFIG_USB_RIO500 is not set +# CONFIG_USB_LEGOTOWER is not set +# CONFIG_USB_LCD is not set +# CONFIG_USB_LED is not set +# CONFIG_USB_CYTHERM is not set +# CONFIG_USB_PHIDGETKIT is not set +# CONFIG_USB_PHIDGETSERVO is not set +# CONFIG_USB_IDMOUSE is not set + +# +# USB ATM/DSL drivers +# # # USB Gadget Support diff -r 6783e59e1c45 -r 522bc50588ed linux-2.6-xen-sparse/arch/xen/i386/kernel/process.c --- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/process.c Tue Aug 23 18:25:51 2005 +++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/process.c Tue Aug 23 18:27:22 2005 @@ -149,12 +149,12 @@ if (cpu_is_offline(cpu)) { local_irq_disable(); +#if defined(CONFIG_XEN) && defined(CONFIG_HOTPLUG_CPU) /* Ack it. From this point on until we get woken up, we're not allowed to take any locks. In particular, don't printk. */ __get_cpu_var(cpu_state) = CPU_DEAD; -#if defined(CONFIG_XEN) && defined(CONFIG_HOTPLUG_CPU) /* Tell hypervisor to take vcpu down. */ HYPERVISOR_vcpu_down(cpu); #endif diff -r 6783e59e1c45 -r 522bc50588ed linux-2.6-xen-sparse/arch/xen/i386/kernel/setup.c --- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/setup.c Tue Aug 23 18:25:51 2005 +++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/setup.c Tue Aug 23 18:27:22 2005 @@ -1575,19 +1575,20 @@ /* Make sure we have a correctly sized P->M table. */ if (max_pfn != xen_start_info.nr_pages) { phys_to_machine_mapping = alloc_bootmem_low_pages( - max_pfn * sizeof(unsigned long)); + max_pfn * sizeof(unsigned int)); if (max_pfn > xen_start_info.nr_pages) { /* set to INVALID_P2M_ENTRY */ memset(phys_to_machine_mapping, ~0, - max_pfn * sizeof(unsigned long)); + max_pfn * sizeof(unsigned int)); memcpy(phys_to_machine_mapping, - (unsigned long *)xen_start_info.mfn_list, - xen_start_info.nr_pages * sizeof(unsigned long)); + (unsigned int *)xen_start_info.mfn_list, + xen_start_info.nr_pages * sizeof(unsigned int)); } else { memcpy(phys_to_machine_mapping, - (unsigned long *)xen_start_info.mfn_list, - max_pfn * sizeof(unsigned long)); + (unsigned int *)xen_start_info.mfn_list, + max_pfn * sizeof(unsigned int)); + /* N.B. below relies on sizeof(int) == sizeof(long). */ if (HYPERVISOR_dom_mem_op( MEMOP_decrease_reservation, (unsigned long *)xen_start_info.mfn_list + max_pfn, @@ -1597,11 +1598,11 @@ free_bootmem( __pa(xen_start_info.mfn_list), PFN_PHYS(PFN_UP(xen_start_info.nr_pages * - sizeof(unsigned long)))); + sizeof(unsigned int)))); } pfn_to_mfn_frame_list = alloc_bootmem_low_pages(PAGE_SIZE); - for ( i=0, j=0; i < max_pfn; i+=(PAGE_SIZE/sizeof(unsigned long)), j++ ) + for ( i=0, j=0; i < max_pfn; i+=(PAGE_SIZE/sizeof(unsigned int)), j++ ) { pfn_to_mfn_frame_list[j] = virt_to_mfn(&phys_to_machine_mapping[i]); diff -r 6783e59e1c45 -r 522bc50588ed linux-2.6-xen-sparse/arch/xen/i386/mm/fault.c --- a/linux-2.6-xen-sparse/arch/xen/i386/mm/fault.c Tue Aug 23 18:25:51 2005 +++ b/linux-2.6-xen-sparse/arch/xen/i386/mm/fault.c Tue Aug 23 18:27:22 2005 @@ -281,7 +281,7 @@ siginfo_t info; /* Set the "privileged fault" bit to something sane. */ - error_code &= 3; + error_code &= ~4; error_code |= (regs->xcs & 2) << 1; if (regs->eflags & X86_EFLAGS_VM) error_code |= 4; diff -r 6783e59e1c45 -r 522bc50588ed linux-2.6-xen-sparse/arch/xen/i386/mm/init.c --- a/linux-2.6-xen-sparse/arch/xen/i386/mm/init.c Tue Aug 23 18:25:51 2005 +++ b/linux-2.6-xen-sparse/arch/xen/i386/mm/init.c Tue Aug 23 18:27:22 2005 @@ -348,9 +348,12 @@ { unsigned long vaddr; pgd_t *pgd_base = (pgd_t *)xen_start_info.pt_base; + int i; swapper_pg_dir = pgd_base; init_mm.pgd = pgd_base; + for (i = 0; i < NR_CPUS; i++) + per_cpu(cur_pgd, i) = pgd_base; /* Enable PSE if available */ if (cpu_has_pse) { diff -r 6783e59e1c45 -r 522bc50588ed linux-2.6-xen-sparse/arch/xen/i386/mm/ioremap.c --- a/linux-2.6-xen-sparse/arch/xen/i386/mm/ioremap.c Tue Aug 23 18:25:51 2005 +++ b/linux-2.6-xen-sparse/arch/xen/i386/mm/ioremap.c Tue Aug 23 18:27:22 2005 @@ -36,6 +36,8 @@ { } +#ifdef __i386__ + void __init *bt_ioremap(unsigned long phys_addr, unsigned long size) { return NULL; @@ -44,6 +46,8 @@ void __init bt_iounmap(void *addr, unsigned long size) { } + +#endif /* __i386__ */ #else @@ -58,7 +62,7 @@ extern unsigned long max_low_pfn; unsigned long mfn = address >> PAGE_SHIFT; unsigned long pfn = mfn_to_pfn(mfn); - return ((pfn < max_low_pfn) && (pfn_to_mfn(pfn) == mfn)); + return ((pfn < max_low_pfn) && (phys_to_machine_mapping[pfn] == mfn)); } /* @@ -126,10 +130,12 @@ return NULL; area->phys_addr = phys_addr; addr = (void __iomem *) area->addr; + flags |= _PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED; +#ifdef __x86_64__ + flags |= _PAGE_USER; +#endif if (direct_remap_area_pages(&init_mm, (unsigned long) addr, phys_addr, - size, __pgprot(_PAGE_PRESENT | _PAGE_RW | - _PAGE_DIRTY | _PAGE_ACCESSED - | flags), domid)) { + size, __pgprot(flags), domid)) { vunmap((void __force *) addr); return NULL; } @@ -218,6 +224,8 @@ kfree(p); } +#ifdef __i386__ + void __init *bt_ioremap(unsigned long phys_addr, unsigned long size) { unsigned long offset, last_addr; @@ -288,6 +296,8 @@ --nrpages; } } + +#endif /* __i386__ */ #endif /* CONFIG_XEN_PHYSDEV_ACCESS */ @@ -346,7 +356,7 @@ * Fill in the machine address: PTE ptr is done later by * __direct_remap_area_pages(). */ - v->val = (machine_addr & PAGE_MASK) | pgprot_val(prot); + v->val = pte_val_ma(pfn_pte_ma(machine_addr >> PAGE_SHIFT, prot)); machine_addr += PAGE_SIZE; address += PAGE_SIZE; diff -r 6783e59e1c45 -r 522bc50588ed linux-2.6-xen-sparse/arch/xen/kernel/gnttab.c --- a/linux-2.6-xen-sparse/arch/xen/kernel/gnttab.c Tue Aug 23 18:25:51 2005 +++ b/linux-2.6-xen-sparse/arch/xen/kernel/gnttab.c Tue Aug 23 18:27:22 2005 @@ -40,38 +40,82 @@ EXPORT_SYMBOL(gnttab_end_foreign_transfer); EXPORT_SYMBOL(gnttab_alloc_grant_references); EXPORT_SYMBOL(gnttab_free_grant_references); +EXPORT_SYMBOL(gnttab_free_grant_reference); EXPORT_SYMBOL(gnttab_claim_grant_reference); EXPORT_SYMBOL(gnttab_release_grant_reference); EXPORT_SYMBOL(gnttab_grant_foreign_access_ref); EXPORT_SYMBOL(gnttab_grant_foreign_transfer_ref); -static grant_ref_t gnttab_free_list[NR_GRANT_ENTRIES]; +#define NR_GRANT_ENTRIES (NR_GRANT_FRAMES * PAGE_SIZE / sizeof(grant_entry_t)) +#define GNTTAB_LIST_END (NR_GRANT_ENTRIES + 1) + +static grant_ref_t gnttab_list[NR_GRANT_ENTRIES]; +static int gnttab_free_count = NR_GRANT_ENTRIES; static grant_ref_t gnttab_free_head; +static spinlock_t gnttab_list_lock = SPIN_LOCK_UNLOCKED; static grant_entry_t *shared; -/* - * Lock-free grant-entry allocator - */ - -static inline int -get_free_entry( - void) -{ - grant_ref_t fh, nfh = gnttab_free_head; - do { if ( unlikely((fh = nfh) == NR_GRANT_ENTRIES) ) return -1; } - while ( unlikely((nfh = cmpxchg(&gnttab_free_head, fh, - gnttab_free_list[fh])) != fh) ); - return fh; +static struct gnttab_free_callback *gnttab_free_callback_list = NULL; + +static int +get_free_entries(int count) +{ + unsigned long flags; + int ref; + grant_ref_t head; + spin_lock_irqsave(&gnttab_list_lock, flags); + if (gnttab_free_count < count) { + spin_unlock_irqrestore(&gnttab_list_lock, flags); + return -1; + } + ref = head = gnttab_free_head; + gnttab_free_count -= count; + while (count-- > 1) + head = gnttab_list[head]; + gnttab_free_head = gnttab_list[head]; + gnttab_list[head] = GNTTAB_LIST_END; + spin_unlock_irqrestore(&gnttab_list_lock, flags); + return ref; +} + +#define get_free_entry() get_free_entries(1) + +static void +do_free_callbacks(void) +{ + struct gnttab_free_callback *callback = gnttab_free_callback_list, *next; + gnttab_free_callback_list = NULL; + while (callback) { + next = callback->next; + if (gnttab_free_count >= callback->count) { + callback->next = NULL; + callback->fn(callback->arg); + } else { + callback->next = gnttab_free_callback_list; + gnttab_free_callback_list = callback; + } + callback = next; + } } static inline void -put_free_entry( - grant_ref_t ref) -{ - grant_ref_t fh, nfh = gnttab_free_head; - do { gnttab_free_list[ref] = fh = nfh; wmb(); } - while ( unlikely((nfh = cmpxchg(&gnttab_free_head, fh, ref)) != fh) ); +check_free_callbacks(void) +{ + if (unlikely(gnttab_free_callback_list)) + do_free_callbacks(); +} + +static void +put_free_entry(grant_ref_t ref) +{ + unsigned long flags; + spin_lock_irqsave(&gnttab_list_lock, flags); + gnttab_list[ref] = gnttab_free_head; + gnttab_free_head = ref; + gnttab_free_count++; + check_free_callbacks(); + spin_unlock_irqrestore(&gnttab_list_lock, flags); } /* @@ -79,8 +123,7 @@ */ int -gnttab_grant_foreign_access( - domid_t domid, unsigned long frame, int readonly) +gnttab_grant_foreign_access(domid_t domid, unsigned long frame, int readonly) { int ref; @@ -96,8 +139,8 @@ } void -gnttab_grant_foreign_access_ref( - grant_ref_t ref, domid_t domid, unsigned long frame, int readonly) +gnttab_grant_foreign_access_ref(grant_ref_t ref, domid_t domid, + unsigned long frame, int readonly) { shared[ref].frame = frame; shared[ref].domid = domid; @@ -107,7 +150,7 @@ int -gnttab_query_foreign_access( grant_ref_t ref ) +gnttab_query_foreign_access(grant_ref_t ref) { u16 nflags; @@ -117,7 +160,7 @@ } void -gnttab_end_foreign_access( grant_ref_t ref, int readonly ) +gnttab_end_foreign_access(grant_ref_t ref, int readonly) { u16 flags, nflags; @@ -132,8 +175,7 @@ } int -gnttab_grant_foreign_transfer( - domid_t domid, unsigned long pfn ) +gnttab_grant_foreign_transfer(domid_t domid, unsigned long pfn) { int ref; @@ -149,8 +191,8 @@ } void -gnttab_grant_foreign_transfer_ref( - grant_ref_t ref, domid_t domid, unsigned long pfn ) +gnttab_grant_foreign_transfer_ref(grant_ref_t ref, domid_t domid, + unsigned long pfn) { shared[ref].frame = pfn; shared[ref].domid = domid; @@ -159,8 +201,7 @@ } unsigned long -gnttab_end_foreign_transfer( - grant_ref_t ref) +gnttab_end_foreign_transfer(grant_ref_t ref) { unsigned long frame = 0; u16 flags; @@ -189,59 +230,79 @@ } void -gnttab_free_grant_references( u16 count, grant_ref_t head ) -{ - /* TODO: O(N)...? */ - grant_ref_t to_die = 0, next = head; - int i; - - for ( i = 0; i < count; i++ ) - { - to_die = next; - next = gnttab_free_list[next]; - put_free_entry( to_die ); +gnttab_free_grant_reference(grant_ref_t ref) +{ + + put_free_entry(ref); +} + +void +gnttab_free_grant_references(grant_ref_t head) +{ + grant_ref_t ref; + unsigned long flags; + int count = 1; + if (head == GNTTAB_LIST_END) + return; + spin_lock_irqsave(&gnttab_list_lock, flags); + ref = head; + while (gnttab_list[ref] != GNTTAB_LIST_END) { + ref = gnttab_list[ref]; + count++; } -} - -int -gnttab_alloc_grant_references( u16 count, - grant_ref_t *head, - grant_ref_t *terminal ) -{ - int i; - grant_ref_t h = gnttab_free_head; - - for ( i = 0; i < count; i++ ) - if ( unlikely(get_free_entry() == -1) ) - goto not_enough_refs; + gnttab_list[ref] = gnttab_free_head; + gnttab_free_head = head; + gnttab_free_count += count; + check_free_callbacks(); + spin_unlock_irqrestore(&gnttab_list_lock, flags); +} + +int +gnttab_alloc_grant_references(u16 count, grant_ref_t *head) +{ + int h = get_free_entries(count); + + if (h == -1) + return -ENOSPC; *head = h; - *terminal = gnttab_free_head; return 0; - -not_enough_refs: - gnttab_free_head = h; - return -ENOSPC; -} - -int -gnttab_claim_grant_reference( grant_ref_t *private_head, - grant_ref_t terminal ) -{ - grant_ref_t g; - if ( unlikely((g = *private_head) == terminal) ) +} + +int +gnttab_claim_grant_reference(grant_ref_t *private_head) +{ + grant_ref_t g = *private_head; + if (unlikely(g == GNTTAB_LIST_END)) return -ENOSPC; - *private_head = gnttab_free_list[g]; + *private_head = gnttab_list[g]; return g; } void -gnttab_release_grant_reference( grant_ref_t *private_head, - grant_ref_t release ) -{ - gnttab_free_list[release] = *private_head; +gnttab_release_grant_reference(grant_ref_t *private_head, grant_ref_t release) +{ + gnttab_list[release] = *private_head; *private_head = release; +} + +void +gnttab_request_free_callback(struct gnttab_free_callback *callback, + void (*fn)(void *), void *arg, u16 count) +{ + unsigned long flags; + spin_lock_irqsave(&gnttab_list_lock, flags); + if (callback->next) + goto out; + callback->fn = fn; + callback->arg = arg; + callback->count = count; + callback->next = gnttab_free_callback_list; + gnttab_free_callback_list = callback; + check_free_callbacks(); + out: + spin_unlock_irqrestore(&gnttab_list_lock, flags); } /* @@ -252,8 +313,9 @@ static struct proc_dir_entry *grant_pde; -static int grant_ioctl(struct inode *inode, struct file *file, - unsigned int cmd, unsigned long data) +static int +grant_ioctl(struct inode *inode, struct file *file, unsigned int cmd, + unsigned long data) { int ret; privcmd_hypercall_t hypercall; @@ -291,8 +353,9 @@ ioctl: grant_ioctl, }; -static int grant_read(char *page, char **start, off_t off, - int count, int *eof, void *data) +static int +grant_read(char *page, char **start, off_t off, int count, int *eof, + void *data) { int len; unsigned int i; @@ -321,8 +384,9 @@ return len; } -static int grant_write(struct file *file, const char __user *buffer, - unsigned long count, void *data) +static int +grant_write(struct file *file, const char __user *buffer, unsigned long count, + void *data) { /* TODO: implement this */ return -ENOSYS; @@ -330,7 +394,8 @@ #endif /* CONFIG_PROC_FS */ -int gnttab_resume(void) +int +gnttab_resume(void) { gnttab_setup_table_t setup; unsigned long frames[NR_GRANT_FRAMES]; @@ -349,7 +414,8 @@ return 0; } -int gnttab_suspend(void) +int +gnttab_suspend(void) { int i; @@ -359,7 +425,8 @@ return 0; } -static int __init gnttab_init(void) +static int __init +gnttab_init(void) { int i; @@ -368,7 +435,7 @@ shared = (grant_entry_t *)fix_to_virt(FIX_GNTTAB_END); for ( i = 0; i < NR_GRANT_ENTRIES; i++ ) - gnttab_free_list[i] = i + 1; + gnttab_list[i] = i + 1; #ifdef CONFIG_PROC_FS /* diff -r 6783e59e1c45 -r 522bc50588ed linux-2.6-xen-sparse/arch/xen/x86_64/kernel/Makefile --- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/Makefile Tue Aug 23 18:25:51 2005 +++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/Makefile Tue Aug 23 18:27:22 2005 @@ -44,7 +44,7 @@ c-obj-$(CONFIG_MODULES) += module.o -#obj-y += topology.o +obj-y += topology.o c-obj-y += intel_cacheinfo.o bootflag-y += ../../../i386/kernel/bootflag.o diff -r 6783e59e1c45 -r 522bc50588ed linux-2.6-xen-sparse/arch/xen/x86_64/kernel/setup.c --- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/setup.c Tue Aug 23 18:25:51 2005 +++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/setup.c Tue Aug 23 18:27:22 2005 @@ -778,21 +778,21 @@ /* Make sure we have a large enough P->M table. */ if (end_pfn > xen_start_info.nr_pages) { phys_to_machine_mapping = alloc_bootmem( - max_pfn * sizeof(unsigned long)); + max_pfn * sizeof(u32)); memset(phys_to_machine_mapping, ~0, - max_pfn * sizeof(unsigned long)); + max_pfn * sizeof(u32)); memcpy(phys_to_machine_mapping, - (unsigned long *)xen_start_info.mfn_list, - xen_start_info.nr_pages * sizeof(unsigned long)); + (u32 *)xen_start_info.mfn_list, + xen_start_info.nr_pages * sizeof(u32)); free_bootmem( __pa(xen_start_info.mfn_list), PFN_PHYS(PFN_UP(xen_start_info.nr_pages * - sizeof(unsigned long)))); + sizeof(u32)))); } pfn_to_mfn_frame_list = alloc_bootmem(PAGE_SIZE); - for ( i=0, j=0; i < end_pfn; i+=(PAGE_SIZE/sizeof(unsigned long)), j++ ) + for ( i=0, j=0; i < end_pfn; i+=(PAGE_SIZE/sizeof(u32)), j++ ) { pfn_to_mfn_frame_list[j] = virt_to_mfn(&phys_to_machine_mapping[i]); diff -r 6783e59e1c45 -r 522bc50588ed linux-2.6-xen-sparse/arch/xen/x86_64/mm/Makefile --- a/linux-2.6-xen-sparse/arch/xen/x86_64/mm/Makefile Tue Aug 23 18:25:51 2005 +++ b/linux-2.6-xen-sparse/arch/xen/x86_64/mm/Makefile Tue Aug 23 18:27:22 2005 @@ -6,10 +6,10 @@ CFLAGS += -Iarch/$(XENARCH)/mm -obj-y := init.o fault.o ioremap.o pageattr.o +obj-y := init.o fault.o pageattr.o c-obj-y := extable.o -i386-obj-y := hypervisor.o +i386-obj-y := hypervisor.o ioremap.o #obj-y := init.o fault.o ioremap.o extable.o pageattr.o #c-obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o diff -r 6783e59e1c45 -r 522bc50588ed linux-2.6-xen-sparse/arch/xen/x86_64/mm/init.c --- a/linux-2.6-xen-sparse/arch/xen/x86_64/mm/init.c Tue Aug 23 18:25:51 2005 +++ b/linux-2.6-xen-sparse/arch/xen/x86_64/mm/init.c Tue Aug 23 18:27:22 2005 @@ -559,6 +559,11 @@ void __init xen_init_pt(void) { + int i; + + for (i = 0; i < NR_CPUS; i++) + per_cpu(cur_pgd, i) = init_mm.pgd; + memcpy((void *)init_level4_pgt, (void *)xen_start_info.pt_base, PAGE_SIZE); diff -r 6783e59e1c45 -r 522bc50588ed linux-2.6-xen-sparse/drivers/xen/blkback/blkback.c --- a/linux-2.6-xen-sparse/drivers/xen/blkback/blkback.c Tue Aug 23 18:25:51 2005 +++ b/linux-2.6-xen-sparse/drivers/xen/blkback/blkback.c Tue Aug 23 18:27:22 2005 @@ -65,9 +65,6 @@ static PEND_RING_IDX pending_prod, pending_cons; #define NR_PENDING_REQS (MAX_PENDING_REQS - pending_prod + pending_cons) -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0) -static kmem_cache_t *buffer_head_cachep; -#else static request_queue_t *plugged_queue; static inline void flush_plugged_queue(void) { @@ -80,7 +77,6 @@ plugged_queue = NULL; } } -#endif /* When using grant tables to map a frame for device access then the * handle returned must be used to unmap the frame. This is needed to @@ -184,11 +180,7 @@ blkif_t *blkif; struct list_head *ent; - daemonize( -#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0) - "xenblkd" -#endif - ); + daemonize("xenblkd"); for ( ; ; ) { @@ -215,11 +207,7 @@ } /* Push the batch through to disc. */ -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0) - run_task_queue(&tq_disk); -#else flush_plugged_queue(); -#endif } } @@ -268,13 +256,6 @@ } } -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0) -static void end_block_io_op(struct buffer_head *bh, int uptodate) -{ - __end_block_io_op(bh->b_private, uptodate); - kmem_cache_free(buffer_head_cachep, bh); -} -#else static int end_block_io_op(struct bio *bio, unsigned int done, int error) { if ( bio->bi_size != 0 ) @@ -283,7 +264,6 @@ bio_put(bio); return error; } -#endif /****************************************************************************** @@ -357,13 +337,9 @@ unsigned long buf; unsigned int nsec; } seg[BLKIF_MAX_SEGMENTS_PER_REQUEST]; unsigned int nseg; -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0) - struct buffer_head *bh; -#else struct bio *bio = NULL, *biolist[BLKIF_MAX_SEGMENTS_PER_REQUEST]; int nbio = 0; request_queue_t *q; -#endif /* Check that number of segments is sane. */ nseg = req->nr_segments; @@ -434,49 +410,6 @@ pending_req->operation = operation; pending_req->status = BLKIF_RSP_OKAY; pending_req->nr_pages = nseg; - -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0) - - atomic_set(&pending_req->pendcnt, nseg); - pending_cons++; - blkif_get(blkif); - - for ( i = 0; i < nseg; i++ ) - { - bh = kmem_cache_alloc(buffer_head_cachep, GFP_KERNEL); - if ( unlikely(bh == NULL) ) - { - __end_block_io_op(pending_req, 0); - continue; - } - - memset(bh, 0, sizeof (struct buffer_head)); - - init_waitqueue_head(&bh->b_wait); - bh->b_size = seg[i].nsec << 9; - bh->b_dev = preq.dev; - bh->b_rdev = preq.dev; - bh->b_rsector = (unsigned long)preq.sector_number; - bh->b_data = (char *)MMAP_VADDR(pending_idx, i) + - (seg[i].buf & ~PAGE_MASK); - bh->b_page = virt_to_page(MMAP_VADDR(pending_idx, i)); - bh->b_end_io = end_block_io_op; - bh->b_private = pending_req; - - bh->b_state = (1 << BH_Mapped) | (1 << BH_Lock) | - (1 << BH_Req) | (1 << BH_Launder); - if ( operation == WRITE ) - bh->b_state |= (1 << BH_JBD) | (1 << BH_Req) | (1 << BH_Uptodate); - - atomic_set(&bh->b_count, 1); - - /* Dispatch a single request. We'll flush it to disc later. */ - generic_make_request(operation, bh); - - preq.sector_number += seg[i].nsec; - } - -#else for ( i = 0; i < nseg; i++ ) { @@ -526,8 +459,6 @@ for ( i = 0; i < nbio; i++ ) submit_bio(operation, biolist[i]); -#endif - return; bad_descriptor: @@ -595,12 +526,6 @@ if ( kernel_thread(blkio_schedule, 0, CLONE_FS | CLONE_FILES) < 0 ) BUG(); -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0) - buffer_head_cachep = kmem_cache_create( - "buffer_head_cache", sizeof(struct buffer_head), - 0, SLAB_HWCACHE_ALIGN, NULL, NULL); -#endif - blkif_xenbus_init(); memset( pending_grant_handles, BLKBACK_INVALID_HANDLE, MMAP_PAGES ); diff -r 6783e59e1c45 -r 522bc50588ed linux-2.6-xen-sparse/drivers/xen/blkback/common.h --- a/linux-2.6-xen-sparse/drivers/xen/blkback/common.h Tue Aug 23 18:25:51 2005 +++ b/linux-2.6-xen-sparse/drivers/xen/blkback/common.h Tue Aug 23 18:27:22 2005 @@ -5,7 +5,6 @@ #include <linux/config.h> #include <linux/version.h> #include <linux/module.h> -#include <linux/rbtree.h> #include <linux/interrupt.h> #include <linux/slab.h> #include <linux/blkdev.h> @@ -30,12 +29,13 @@ #define DPRINTK(_f, _a...) ((void)0) #endif -#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0) -typedef struct rb_root rb_root_t; -typedef struct rb_node rb_node_t; -#else -struct block_device; -#endif +struct vbd { + blkif_vdev_t handle; /* what the domain refers to this vbd as */ + unsigned char readonly; /* Non-zero -> read-only */ + unsigned char type; /* VDISK_xxx */ + blkif_pdev_t pdevice; /* phys device that this vbd maps to */ + struct block_device *bdev; +}; typedef struct blkif_st { /* Unique identifier for this interface. */ @@ -48,25 +48,18 @@ /* Comms information. */ blkif_back_ring_t blk_ring; /* VBDs attached to this interface. */ - rb_root_t vbd_rb; /* Mapping from 16-bit vdevices to VBDs.*/ - spinlock_t vbd_lock; /* Protects VBD mapping. */ + struct vbd vbd; /* Private fields. */ enum { DISCONNECTED, CONNECTED } status; - /* - * DISCONNECT response is deferred until pending requests are ack'ed. - * We therefore need to store the id from the original request. - */ - u8 disconnect_rspid; #ifdef CONFIG_XEN_BLKDEV_TAP_BE /* Is this a blktap frontend */ unsigned int is_blktap; #endif - struct blkif_st *hash_next; struct list_head blkdev_list; spinlock_t blk_ring_lock; atomic_t refcnt; - struct work_struct work; + struct work_struct free_work; u16 shmem_handle; unsigned long shmem_vaddr; grant_ref_t shmem_ref; @@ -77,30 +70,25 @@ void blkif_connect(blkif_be_connect_t *connect); int blkif_disconnect(blkif_be_disconnect_t *disconnect, u8 rsp_id); void blkif_disconnect_complete(blkif_t *blkif); -blkif_t *blkif_find(domid_t domid); -void free_blkif(blkif_t *blkif); +blkif_t *alloc_blkif(domid_t domid); +void free_blkif_callback(blkif_t *blkif); int blkif_map(blkif_t *blkif, unsigned long shared_page, unsigned int evtchn); #define blkif_get(_b) (atomic_inc(&(_b)->refcnt)) #define blkif_put(_b) \ do { \ if ( atomic_dec_and_test(&(_b)->refcnt) ) \ - free_blkif(_b); \ + free_blkif_callback(_b); \ } while (0) -struct vbd; -void vbd_free(blkif_t *blkif, struct vbd *vbd); - -/* Creates inactive vbd. */ -struct vbd *vbd_create(blkif_t *blkif, blkif_vdev_t vdevice, blkif_pdev_t pdevice, int readonly); -int vbd_is_active(struct vbd *vbd); -void vbd_activate(blkif_t *blkif, struct vbd *vbd); +/* Create a vbd. */ +int vbd_create(blkif_t *blkif, blkif_vdev_t vdevice, blkif_pdev_t pdevice, + int readonly); +void vbd_free(struct vbd *vbd); unsigned long vbd_size(struct vbd *vbd); unsigned int vbd_info(struct vbd *vbd); unsigned long vbd_secsize(struct vbd *vbd); -void vbd_destroy(blkif_be_vbd_destroy_t *delete); -void destroy_all_vbds(blkif_t *blkif); struct phys_req { unsigned short dev; diff -r 6783e59e1c45 -r 522bc50588ed linux-2.6-xen-sparse/drivers/xen/blkback/interface.c --- a/linux-2.6-xen-sparse/drivers/xen/blkback/interface.c Tue Aug 23 18:25:51 2005 +++ b/linux-2.6-xen-sparse/drivers/xen/blkback/interface.c Tue Aug 23 18:27:22 2005 @@ -9,27 +9,11 @@ #include "common.h" #include <asm-xen/evtchn.h> -#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0) -#define VMALLOC_VMADDR(x) ((unsigned long)(x)) -#endif +static kmem_cache_t *blkif_cachep; -#define BLKIF_HASHSZ 1024 -#define BLKIF_HASH(_d) (((int)(_d))&(BLKIF_HASHSZ-1)) - -static kmem_cache_t *blkif_cachep; -static blkif_t *blkif_hash[BLKIF_HASHSZ]; - -blkif_t *blkif_find(domid_t domid) +blkif_t *alloc_blkif(domid_t domid) { - blkif_t *blkif = blkif_hash[BLKIF_HASH(domid)]; - - while (blkif) { - if (blkif->domid == domid) { - blkif_get(blkif); - return blkif; - } - blkif = blkif->hash_next; - } + blkif_t *blkif; blkif = kmem_cache_alloc(blkif_cachep, GFP_KERNEL); if (!blkif) @@ -38,12 +22,9 @@ memset(blkif, 0, sizeof(*blkif)); blkif->domid = domid; blkif->status = DISCONNECTED; - spin_lock_init(&blkif->vbd_lock); spin_lock_init(&blkif->blk_ring_lock); atomic_set(&blkif->refcnt, 1); - blkif->hash_next = blkif_hash[BLKIF_HASH(domid)]; - blkif_hash[BLKIF_HASH(domid)] = blkif; return blkif; } @@ -55,7 +36,7 @@ op.flags = GNTMAP_host_map; op.ref = shared_page; op.dom = blkif->domid; - + BUG_ON( HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1) ); if (op.handle < 0) { @@ -91,7 +72,7 @@ if ( (vma = get_vm_area(PAGE_SIZE, VM_IOREMAP)) == NULL ) return -ENOMEM; - err = map_frontend_page(blkif, VMALLOC_VMADDR(vma->addr), shared_page); + err = map_frontend_page(blkif, (unsigned long)vma->addr, shared_page); if (err) { vfree(vma->addr); return err; @@ -123,10 +104,10 @@ return 0; } -void free_blkif(blkif_t *blkif) +static void free_blkif(void *arg) { - blkif_t **pblkif; evtchn_op_t op = { .cmd = EVTCHNOP_close }; + blkif_t *blkif = (blkif_t *)arg; op.u.close.port = blkif->evtchn; op.u.close.dom = DOMID_SELF; @@ -134,6 +115,8 @@ op.u.close.port = blkif->remote_evtchn; op.u.close.dom = blkif->domid; HYPERVISOR_event_channel_op(&op); + + vbd_free(&blkif->vbd); if (blkif->evtchn) unbind_evtchn_from_irqhandler(blkif->evtchn, blkif); @@ -143,20 +126,17 @@ vfree(blkif->blk_ring.sring); } - pblkif = &blkif_hash[BLKIF_HASH(blkif->domid)]; - while ( *pblkif != blkif ) - { - BUG_ON(!*pblkif); - pblkif = &(*pblkif)->hash_next; - } - *pblkif = blkif->hash_next; - destroy_all_vbds(blkif); kmem_cache_free(blkif_cachep, blkif); +} + +void free_blkif_callback(blkif_t *blkif) +{ + INIT_WORK(&blkif->free_work, free_blkif, (void *)blkif); + schedule_work(&blkif->free_work); } void __init blkif_interface_init(void) { blkif_cachep = kmem_cache_create("blkif_cache", sizeof(blkif_t), 0, 0, NULL, NULL); - memset(blkif_hash, 0, sizeof(blkif_hash)); } diff -r 6783e59e1c45 -r 522bc50588ed linux-2.6-xen-sparse/drivers/xen/blkback/vbd.c --- a/linux-2.6-xen-sparse/drivers/xen/blkback/vbd.c Tue Aug 23 18:25:51 2005 +++ b/linux-2.6-xen-sparse/drivers/xen/blkback/vbd.c Tue Aug 23 18:27:22 2005 @@ -2,10 +2,6 @@ * blkback/vbd.c * * Routines for managing virtual block devices (VBDs). - * - * NOTE: vbd_lock protects updates to the rb_tree against concurrent lookups - * in vbd_translate. All other lookups are implicitly protected because the - * only caller (the control message dispatch routine) serializes the calls. * * Copyright (c) 2003-2005, Keir Fraser & Steve Hand */ @@ -13,28 +9,13 @@ #include "common.h" #include <asm-xen/xenbus.h> -struct vbd { - blkif_vdev_t handle; /* what the domain refers to this vbd as */ - unsigned char readonly; /* Non-zero -> read-only */ - unsigned char type; /* VDISK_xxx */ - blkif_pdev_t pdevice; /* phys device that this vbd maps to */ - struct block_device *bdev; - - int active; - rb_node_t rb; /* for linking into R-B tree lookup struct */ -}; - -#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0) static inline dev_t vbd_map_devnum(blkif_pdev_t cookie) -{ return MKDEV(cookie>>8, cookie&0xff); } +{ + return MKDEV(BLKIF_MAJOR(cookie), BLKIF_MINOR(cookie)); +} #define vbd_sz(_v) ((_v)->bdev->bd_part ? \ (_v)->bdev->bd_part->nr_sects : (_v)->bdev->bd_disk->capacity) #define bdev_put(_b) blkdev_put(_b) -#else -#define vbd_sz(_v) (blk_size[MAJOR((_v)->pdevice)][MINOR((_v)->pdevice)]*2) -#define bdev_put(_b) ((void)0) -#define bdev_hardsect_size(_b) 512 -#endif unsigned long vbd_size(struct vbd *vbd) { @@ -51,45 +32,32 @@ return bdev_hardsect_size(vbd->bdev); } -int vbd_is_active(struct vbd *vbd) +int vbd_create(blkif_t *blkif, blkif_vdev_t handle, + blkif_pdev_t pdevice, int readonly) { - return vbd->active; -} + struct vbd *vbd; -struct vbd *vbd_create(blkif_t *blkif, blkif_vdev_t handle, - blkif_pdev_t pdevice, int readonly) -{ - struct vbd *vbd; - - if ( unlikely((vbd = kmalloc(sizeof(struct vbd), GFP_KERNEL)) == NULL) ) - { - DPRINTK("vbd_create: out of memory\n"); - return ERR_PTR(-ENOMEM); - } - + vbd = &blkif->vbd; vbd->handle = handle; vbd->readonly = readonly; vbd->type = 0; - vbd->active = 0; vbd->pdevice = pdevice; - /* FIXME: Who frees vbd on failure? --RR */ -#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0) vbd->bdev = open_by_devnum( vbd_map_devnum(vbd->pdevice), vbd->readonly ? FMODE_READ : FMODE_WRITE); if ( IS_ERR(vbd->bdev) ) { DPRINTK("vbd_creat: device %08x doesn't exist.\n", vbd->pdevice); - return ERR_PTR(-ENOENT); + return -ENOENT; } if ( (vbd->bdev->bd_disk == NULL) ) { DPRINTK("vbd_creat: device %08x doesn't exist.\n", vbd->pdevice); - bdev_put(vbd->bdev); - return ERR_PTR(-ENOENT); + vbd_free(vbd); + return -ENOENT; } if ( vbd->bdev->bd_disk->flags & GENHD_FL_CD ) @@ -97,121 +65,27 @@ if ( vbd->bdev->bd_disk->flags & GENHD_FL_REMOVABLE ) vbd->type |= VDISK_REMOVABLE; -#else - if ( (blk_size[MAJOR(vbd->pdevice)] == NULL) || (vbd_sz(vbd) == 0) ) - { - DPRINTK("vbd_creat: device %08x doesn't exist.\n", vbd->pdevice); - return ERR_PTR(-ENOENT); - } -#endif - DPRINTK("Successful creation of handle=%04x (dom=%u)\n", handle, blkif->domid); - return vbd; + return 0; } -void vbd_activate(blkif_t *blkif, struct vbd *vbd) +void vbd_free(struct vbd *vbd) { - rb_node_t **rb_p, *rb_parent = NULL; - struct vbd *i; - BUG_ON(vbd_is_active(vbd)); - - /* Find where to put it. */ - rb_p = &blkif->vbd_rb.rb_node; - while ( *rb_p != NULL ) - { - rb_parent = *rb_p; - i = rb_entry(rb_parent, struct vbd, rb); - if ( vbd->handle < i->handle ) - { - rb_p = &rb_parent->rb_left; - } - else if ( vbd->handle > i->handle ) - { - rb_p = &rb_parent->rb_right; - } - else - { - /* We never create two of same vbd, so not possible. */ - BUG(); - } - } - - /* Now we're active. */ - vbd->active = 1; - blkif_get(blkif); - - spin_lock(&blkif->vbd_lock); - rb_link_node(&vbd->rb, rb_parent, rb_p); - rb_insert_color(&vbd->rb, &blkif->vbd_rb); - spin_unlock(&blkif->vbd_lock); -} - -void vbd_free(blkif_t *blkif, struct vbd *vbd) -{ - if (vbd_is_active(vbd)) { - spin_lock(&blkif->vbd_lock); - rb_erase(&vbd->rb, &blkif->vbd_rb); - spin_unlock(&blkif->vbd_lock); - blkif_put(blkif); - } - bdev_put(vbd->bdev); - kfree(vbd); -} - -void destroy_all_vbds(blkif_t *blkif) -{ - struct vbd *vbd; - rb_node_t *rb; - - spin_lock(&blkif->vbd_lock); - - while ( (rb = blkif->vbd_rb.rb_node) != NULL ) - { - vbd = rb_entry(rb, struct vbd, rb); - rb_erase(rb, &blkif->vbd_rb); - spin_unlock(&blkif->vbd_lock); - bdev_put(vbd->bdev); - kfree(vbd); - spin_lock(&blkif->vbd_lock); - blkif_put(blkif); - } - - spin_unlock(&blkif->vbd_lock); + if (vbd->bdev) + bdev_put(vbd->bdev); + vbd->bdev = NULL; } int vbd_translate(struct phys_req *req, blkif_t *blkif, int operation) { - struct vbd *vbd; - rb_node_t *rb; - int rc = -EACCES; + struct vbd *vbd = &blkif->vbd; + int rc = -EACCES; - /* Take the vbd_lock because another thread could be updating the tree. */ - spin_lock(&blkif->vbd_lock); - - rb = blkif->vbd_rb.rb_node; - while ( rb != NULL ) - { - vbd = rb_entry(rb, struct vbd, rb); - if ( req->dev < vbd->handle ) - rb = rb->rb_left; - else if ( req->dev > vbd->handle ) - rb = rb->rb_right; - else - goto found; - } - - DPRINTK("vbd_translate; domain %u attempted to access " - "non-existent VBD.\n", blkif->domid); - rc = -ENODEV; - goto out; - - found: - - if ( (operation == WRITE) && vbd->readonly ) + if ((operation == WRITE) && vbd->readonly) goto out; - if ( unlikely((req->sector_number + req->nr_sects) > vbd_sz(vbd)) ) + if (unlikely((req->sector_number + req->nr_sects) > vbd_sz(vbd))) goto out; req->dev = vbd->pdevice; @@ -219,6 +93,5 @@ rc = 0; out: - spin_unlock(&blkif->vbd_lock); return rc; } diff -r 6783e59e1c45 -r 522bc50588ed linux-2.6-xen-sparse/drivers/xen/blkback/xenbus.c --- a/linux-2.6-xen-sparse/drivers/xen/blkback/xenbus.c Tue Aug 23 18:25:51 2005 +++ b/linux-2.6-xen-sparse/drivers/xen/blkback/xenbus.c Tue Aug 23 18:27:22 2005 @@ -26,7 +26,6 @@ /* our communications channel */ blkif_t *blkif; - struct vbd *vbd; long int frontend_id; long int pdev; @@ -47,8 +46,6 @@ if (be->watch.node) unregister_xenbus_watch(&be->watch); unregister_xenbus_watch(&be->backend_watch); - if (be->vbd) - vbd_free(be->blkif, be->vbd); if (be->blkif) blkif_put(be->blkif); if (be->frontpath) @@ -72,7 +69,7 @@ device_unregister(&be->dev->dev); return; } - if (vbd_is_active(be->vbd)) + if (be->blkif->status == CONNECTED) return; err = xenbus_gather(be->frontpath, "grant-id", "%lu", &sharedmfn, @@ -85,9 +82,8 @@ } /* Domains must use same shared frame for all vbds. */ - if (be->blkif->status == CONNECTED && - (evtchn != be->blkif->remote_evtchn || - sharedmfn != be->blkif->shmem_frame)) { + if (evtchn != be->blkif->remote_evtchn || + sharedmfn != be->blkif->shmem_frame) { xenbus_dev_error(be->dev, err, "Shared frame/evtchn %li/%u not same as" " old %li/%u", @@ -105,7 +101,7 @@ } err = xenbus_printf(be->dev->nodename, "sectors", "%lu", - vbd_size(be->vbd)); + vbd_size(&be->blkif->vbd)); if (err) { xenbus_dev_error(be->dev, err, "writing %s/sectors", be->dev->nodename); @@ -114,33 +110,28 @@ /* FIXME: use a typename instead */ err = xenbus_printf(be->dev->nodename, "info", "%u", - vbd_info(be->vbd)); + vbd_info(&be->blkif->vbd)); if (err) { xenbus_dev_error(be->dev, err, "writing %s/info", be->dev->nodename); goto abort; } err = xenbus_printf(be->dev->nodename, "sector-size", "%lu", - vbd_secsize(be->vbd)); + vbd_secsize(&be->blkif->vbd)); if (err) { xenbus_dev_error(be->dev, err, "writing %s/sector-size", be->dev->nodename); goto abort; } - /* First vbd? We need to map the shared frame, irq etc. */ - if (be->blkif->status != CONNECTED) { - err = blkif_map(be->blkif, sharedmfn, evtchn); - if (err) { - xenbus_dev_error(be->dev, err, - "mapping shared-frame %lu port %u", - sharedmfn, evtchn); - goto abort; - } - } - - /* We're ready, activate. */ - vbd_activate(be->blkif, be->vbd); + /* Map the shared frame, irq etc. */ + err = blkif_map(be->blkif, sharedmfn, evtchn); + if (err) { + xenbus_dev_error(be->dev, err, + "mapping shared-frame %lu port %u", + sharedmfn, evtchn); + goto abort; + } xenbus_transaction_end(0); xenbus_dev_ok(be->dev); @@ -228,20 +219,16 @@ p = strrchr(be->frontpath, '/') + 1; handle = simple_strtoul(p, NULL, 0); - be->blkif = blkif_find(be->frontend_id); + be->blkif = alloc_blkif(be->frontend_id); if (IS_ERR(be->blkif)) { err = PTR_ERR(be->blkif); be->blkif = NULL; goto device_fail; } - be->vbd = vbd_create(be->blkif, handle, be->pdev, - be->readonly); - if (IS_ERR(be->vbd)) { - err = PTR_ERR(be->vbd); - be->vbd = NULL; + err = vbd_create(be->blkif, handle, be->pdev, be->readonly); + if (err) goto device_fail; - } frontend_changed(&be->watch, be->frontpath); } diff -r 6783e59e1c45 -r 522bc50588ed linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c --- a/linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c Tue Aug 23 18:25:51 2005 +++ b/linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c Tue Aug 23 18:27:22 2005 @@ -63,25 +63,16 @@ /* Control whether runtime update of vbds is enabled. */ #define ENABLE_VBD_UPDATE 1 -#define BLKIF_STATE_CLOSED 0 -#define BLKIF_STATE_DISCONNECTED 1 -#define BLKIF_STATE_CONNECTED 2 - -static unsigned int blkif_state = BLKIF_STATE_CLOSED; -static unsigned int blkif_evtchn = 0; -static unsigned int blkif_vbds = 0; -static unsigned int blkif_vbds_connected = 0; - -static blkif_front_ring_t blk_ring; +#define BLKIF_STATE_DISCONNECTED 0 +#define BLKIF_STATE_CONNECTED 1 + +static unsigned int blkif_state = BLKIF_STATE_DISCONNECTED; #define BLK_RING_SIZE __RING_SIZE((blkif_sring_t *)0, PAGE_SIZE) -static domid_t rdomid = 0; -static grant_ref_t gref_head, gref_terminal; #define MAXIMUM_OUTSTANDING_BLOCK_REQS \ (BLKIF_MAX_SEGMENTS_PER_REQUEST * BLKIF_RING_SIZE) #define GRANTREF_INVALID (1<<15) -static int shmem_ref; static struct blk_shadow { blkif_request_t req; @@ -92,7 +83,7 @@ static int recovery = 0; /* Recovery in progress: protected by blkif_io_lock */ -static void kick_pending_request_queues(void); +static void kick_pending_request_queues(struct blkfront_info *info); static int __init xlblk_init(void); @@ -119,7 +110,7 @@ /* Kernel-specific definitions used in the common code */ #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0) -#define DISABLE_SCATTERGATHER() +#define DISABLE_SCATTERGATHER() #else static int sg_operation = -1; #define DISABLE_SCATTERGATHER() (sg_operation = -1) @@ -138,11 +129,11 @@ } -static inline void flush_requests(void) +static inline void flush_requests(struct blkfront_info *info) { DISABLE_SCATTERGATHER(); - RING_PUSH_REQUESTS(&blk_ring); - notify_via_evtchn(blkif_evtchn); + RING_PUSH_REQUESTS(&info->ring); + notify_via_evtchn(info->evtchn); } @@ -152,30 +143,39 @@ module_init(xlblk_init); -static struct xlbd_disk_info *head_waiting = NULL; -static void kick_pending_request_queues(void) -{ - struct xlbd_disk_info *di; - while ( ((di = head_waiting) != NULL) && !RING_FULL(&blk_ring) ) - { - head_waiting = di->next_waiting; - di->next_waiting = NULL; - /* Re-enable calldowns. */ - blk_start_queue(di->rq); - /* Kick things off immediately. */ - do_blkif_request(di->rq); - } +static void kick_pending_request_queues(struct blkfront_info *info) +{ + if (!RING_FULL(&info->ring)) { + /* Re-enable calldowns. */ + blk_start_queue(info->rq); + /* Kick things off immediately. */ + do_blkif_request(info->rq); + } +} + +static void blkif_restart_queue(void *arg) +{ + struct blkfront_info *info = (struct blkfront_info *)arg; + spin_lock_irq(&blkif_io_lock); + kick_pending_request_queues(info); + spin_unlock_irq(&blkif_io_lock); +} + +static void blkif_restart_queue_callback(void *arg) +{ + struct blkfront_info *info = (struct blkfront_info *)arg; + schedule_work(&info->work); } int blkif_open(struct inode *inode, struct file *filep) { - struct gendisk *gd = inode->i_bdev->bd_disk; - struct xlbd_disk_info *di = (struct xlbd_disk_info *)gd->private_data; - - /* Update of usage count is protected by per-device semaphore. */ - di->mi->usage++; - - return 0; + // struct gendisk *gd = inode->i_bdev->bd_disk; + // struct xlbd_disk_info *di = (struct xlbd_disk_info *)gd->private_data; + + /* Update of usage count is protected by per-device semaphore. */ + // di->mi->usage++; + + return 0; } @@ -192,8 +192,8 @@ int i; DPRINTK_IOCTL("command: 0x%x, argument: 0x%lx, dev: 0x%04x\n", - command, (long)argument, inode->i_rdev); - + command, (long)argument, inode->i_rdev); + switch ( command ) { case HDIO_GETGEO: @@ -219,7 +219,7 @@ /* * blkif_queue_request * - * request block io + * request block io * * id: for guest use only. * operation: BLKIF_OP_{READ,WRITE,PROBE} @@ -228,7 +228,7 @@ */ static int blkif_queue_request(struct request *req) { - struct xlbd_disk_info *di = req->rq_disk->private_data; + struct blkfront_info *info = req->rq_disk->private_data; unsigned long buffer_ma; blkif_request_t *ring_req; struct bio *bio; @@ -237,20 +237,28 @@ unsigned long id; unsigned int fsect, lsect; int ref; - - if ( unlikely(blkif_state != BLKIF_STATE_CONNECTED) ) + grant_ref_t gref_head; + + if (unlikely(info->connected != BLKIF_STATE_CONNECTED)) return 1; + if (gnttab_alloc_grant_references(BLKIF_MAX_SEGMENTS_PER_REQUEST, + &gref_head) < 0) { + gnttab_request_free_callback(&info->callback, + blkif_restart_queue_callback, info, + BLKIF_MAX_SEGMENTS_PER_REQUEST); + return 1; + } + /* Fill out a communications ring structure. */ - ring_req = RING_GET_REQUEST(&blk_ring, blk_ring.req_prod_pvt); + ring_req = RING_GET_REQUEST(&info->ring, info->ring.req_prod_pvt); id = GET_ID_FROM_FREELIST(); blk_shadow[id].request = (unsigned long)req; ring_req->id = id; - ring_req->operation = rq_data_dir(req) ? BLKIF_OP_WRITE : - BLKIF_OP_READ; + ring_req->operation = rq_data_dir(req) ? BLKIF_OP_WRITE : BLKIF_OP_READ; ring_req->sector_number = (blkif_sector_t)req->sector; - ring_req->handle = di->handle; + ring_req->handle = info->handle; ring_req->nr_segments = 0; rq_for_each_bio(bio, req) @@ -263,31 +271,34 @@ fsect = bvec->bv_offset >> 9; lsect = fsect + (bvec->bv_len >> 9) - 1; /* install a grant reference. */ - ref = gnttab_claim_grant_reference(&gref_head, gref_terminal); + ref = gnttab_claim_grant_reference(&gref_head); ASSERT( ref != -ENOSPC ); gnttab_grant_foreign_access_ref( ref, - rdomid, + info->backend_id, buffer_ma >> PAGE_SHIFT, rq_data_dir(req) ); blk_shadow[id].frame[ring_req->nr_segments] = buffer_ma >> PAGE_SHIFT; - ring_req->frame_and_sects[ring_req->nr_segments++] = + ring_req->frame_and_sects[ring_req->nr_segments] = blkif_fas_from_gref(ref, fsect, lsect); + + ring_req->nr_segments++; } } - blk_ring.req_prod_pvt++; - + info->ring.req_prod_pvt++; + /* Keep a private copy so we can reissue requests when recovering. */ pickle_request(&blk_shadow[id], ring_req); + gnttab_free_grant_references(gref_head); + return 0; } - /* * do_blkif_request @@ -295,24 +306,26 @@ */ void do_blkif_request(request_queue_t *rq) { - struct xlbd_disk_info *di; + struct blkfront_info *info = NULL; struct request *req; int queued; - DPRINTK("Entered do_blkif_request\n"); + DPRINTK("Entered do_blkif_request\n"); queued = 0; while ( (req = elv_next_request(rq)) != NULL ) { + info = req->rq_disk->private_data; + if ( !blk_fs_request(req) ) { end_request(req, 0); continue; } - if ( RING_FULL(&blk_ring) ) - goto wait; + if (RING_FULL(&info->ring)) + goto wait; DPRINTK("do_blk_req %p: cmd %p, sec %lx, (%u/%li) buffer:%p [%s]\n", req, req->cmd, req->sector, req->current_nr_sectors, @@ -320,25 +333,19 @@ rq_data_dir(req) ? "write" : "read"); blkdev_dequeue_request(req); - if ( blkif_queue_request(req) ) - { + if (blkif_queue_request(req)) { + blk_requeue_request(rq, req); wait: - di = req->rq_disk->private_data; - if ( di->next_waiting == NULL ) - { - di->next_waiting = head_waiting; - head_waiting = di; - /* Avoid pointless unplugs. */ - blk_stop_queue(rq); - } - break; + /* Avoid pointless unplugs. */ + blk_stop_queue(rq); + break; } queued++; } if ( queued != 0 ) - flush_requests(); + flush_requests(info); } @@ -347,25 +354,24 @@ struct request *req; blkif_response_t *bret; RING_IDX i, rp; - unsigned long flags; - - spin_lock_irqsave(&blkif_io_lock, flags); - - if ( unlikely(blkif_state == BLKIF_STATE_CLOSED) || - unlikely(recovery) ) - { + unsigned long flags; + struct blkfront_info *info = (struct blkfront_info *)dev_id; + + spin_lock_irqsave(&blkif_io_lock, flags); + + if (unlikely(info->connected != BLKIF_STATE_CONNECTED || recovery)) { spin_unlock_irqrestore(&blkif_io_lock, flags); return IRQ_HANDLED; } - - rp = blk_ring.sring->rsp_prod; + + rp = info->ring.sring->rsp_prod; rmb(); /* Ensure we see queued responses up to 'rp'. */ - for ( i = blk_ring.rsp_cons; i != rp; i++ ) + for ( i = info->ring.rsp_cons; i != rp; i++ ) { unsigned long id; - bret = RING_GET_RESPONSE(&blk_ring, i); + bret = RING_GET_RESPONSE(&info->ring, i); id = bret->id; req = (struct request *)blk_shadow[id].request; @@ -382,7 +388,7 @@ bret->status); if ( unlikely(end_that_request_first - (req, + (req, (bret->status == BLKIF_RSP_OKAY), req->hard_nr_sectors)) ) BUG(); @@ -394,9 +400,9 @@ } } - blk_ring.rsp_cons = i; - - kick_pending_request_queues(); + info->ring.rsp_cons = i; + + kick_pending_request_queues(info); spin_unlock_irqrestore(&blkif_io_lock, flags); @@ -425,31 +431,31 @@ static void kick_pending_request_queues(void) { /* We kick pending request queues if the ring is reasonably empty. */ - if ( (nr_pending != 0) && - (RING_PENDING_REQUESTS(&blk_ring) < (BLK_RING_SIZE >> 1)) ) + if ( (nr_pending != 0) && + (RING_PENDING_REQUESTS(&info->ring) < (BLK_RING_SIZE >> 1)) ) { /* Attempt to drain the queue, but bail if the ring becomes full. */ - while ( (nr_pending != 0) && !RING_FULL(&blk_ring) ) + while ( (nr_pending != 0) && !RING_FULL(&info->ring) ) do_blkif_request(pending_queues[--nr_pending]); } } int blkif_open(struct inode *inode, struct file *filep) { - short xldev = inode->i_rdev; + short xldev = inode->i_rdev; struct gendisk *gd = get_gendisk(xldev); xl_disk_t *disk = xldev_to_xldisk(inode->i_rdev); - short minor = MINOR(xldev); + short minor = MINOR(xldev); if ( gd->part[minor].nr_sects == 0 ) - { + { /* * Device either doesn't exist, or has zero capacity; we use a few * cheesy heuristics to return the relevant error code */ if ( (gd->sizes[minor >> gd->minor_shift] != 0) || ((minor & (gd->max_p - 1)) != 0) ) - { + { /* * We have a real device, but no such partition, or we just have a * partition number so guess this is the problem. @@ -458,16 +464,16 @@ } else if ( gd->flags[minor >> gd->minor_shift] & GENHD_FL_REMOVABLE ) { - /* This is a removable device => assume that media is missing. */ + /* This is a removable device => assume that media is missing. */ return -ENOMEDIUM; /* media not present (this is a guess) */ - } + } else - { + { /* Just go for the general 'no such device' error. */ return -ENODEV; /* no such device */ } } - + /* Update of usage count is protected by per-device semaphore. */ disk->usage++; @@ -496,24 +502,24 @@ { kdev_t dev = inode->i_rdev; struct hd_geometry *geo = (struct hd_geometry *)argument; - struct gendisk *gd; - struct hd_struct *part; + struct gendisk *gd; + struct hd_struct *part; int i; unsigned short cylinders; byte heads, sectors; /* NB. No need to check permissions. That is done for us. */ - + DPRINTK_IOCTL("command: 0x%x, argument: 0x%lx, dev: 0x%04x\n", - command, (long) argument, dev); - + command, (long) argument, dev); + gd = get_gendisk(dev); - part = &gd->part[MINOR(dev)]; + part = &gd->part[MINOR(dev)]; switch ( command ) { case BLKGETSIZE: - DPRINTK_IOCTL(" BLKGETSIZE: %x %lx\n", BLKGETSIZE, part->nr_sects); + DPRINTK_IOCTL(" BLKGETSIZE: %x %lx\n", BLKGETSIZE, part->nr_sects); return put_user(part->nr_sects, (unsigned long *) argument); case BLKGETSIZE64: @@ -526,7 +532,7 @@ return blkif_revalidate(dev); case BLKSSZGET: - return hardsect_size[MAJOR(dev)][MINOR(dev)]; + return hardsect_size[MAJOR(dev)][MINOR(dev)]; case BLKBSZGET: /* get block size */ DPRINTK_IOCTL(" BLKBSZGET: %x\n", BLKBSZGET); @@ -552,7 +558,7 @@ values consistent with the size of the device */ heads = 0xff; - sectors = 0x3f; + sectors = 0x3f; cylinders = part->nr_sects / (heads * sectors); if (put_user(0x00, (unsigned long *) &geo->start)) return -EFAULT; @@ -562,7 +568,7 @@ return 0; - case HDIO_GETGEO_BIG: + case HDIO_GETGEO_BIG: DPRINTK_IOCTL(" HDIO_GETGEO_BIG: %x\n", HDIO_GETGEO_BIG); if (!argument) return -EINVAL; @@ -570,7 +576,7 @@ values consistent with the size of the device */ heads = 0xff; - sectors = 0x3f; + sectors = 0x3f; cylinders = part->nr_sects / (heads * sectors); if (put_user(0x00, (unsigned long *) &geo->start)) return -EFAULT; @@ -594,7 +600,7 @@ WPRINTK("ioctl %08x not supported by XL blkif\n", command); return -ENOSYS; } - + return 0; } @@ -614,7 +620,7 @@ xl_disk_t *disk; unsigned long capacity; int i, rc = 0; - + if ( (bd = bdget(dev)) == NULL ) return -EINVAL; @@ -662,7 +668,7 @@ /* * blkif_queue_request * - * request block io + * request block io * * id: for guest use only. * operation: BLKIF_OP_{READ,WRITE,PROBE} @@ -696,7 +702,7 @@ buffer_ma &= PAGE_MASK; - if ( unlikely(blkif_state != BLKIF_STATE_CONNECTED) ) + if (unlikely(info->connected != BLKIF_STATE_CONNECTED)) return 1; switch ( operation ) @@ -704,7 +710,7 @@ case BLKIF_OP_READ: case BLKIF_OP_WRITE: - gd = get_gendisk(device); + gd = get_gendisk(device); /* * Update the sector_number we'll pass down as appropriate; note that @@ -714,10 +720,10 @@ sector_number += gd->part[MINOR(device)].start_sect; /* - * If this unit doesn't consist of virtual partitions then we clear + * If this unit doesn't consist of virtual partitions then we clear * the partn bits from the device number. */ - if ( !(gd->flags[MINOR(device)>>gd->minor_shift] & + if ( !(gd->flags[MINOR(device)>>gd->minor_shift] & GENHD_FL_VIRT_PARTNS) ) device &= ~(gd->max_p - 1); @@ -725,20 +731,20 @@ (sg_dev == device) && (sg_next_sect == sector_number) ) { - req = RING_GET_REQUEST(&blk_ring, - blk_ring.req_prod_pvt - 1); + req = RING_GET_REQUEST(&info->ring, + info->ring.req_prod_pvt - 1); bh = (struct buffer_head *)id; - + bh->b_reqnext = (struct buffer_head *)blk_shadow[req->id].request; blk_shadow[req->id].request = (unsigned long)id; /* install a grant reference. */ - ref = gnttab_claim_grant_reference(&gref_head, gref_terminal); + ref = gnttab_claim_grant_reference(&gref_head); ASSERT( ref != -ENOSPC ); gnttab_grant_foreign_access_ref( ref, - rdomid, + info->backend_id, buffer_ma >> PAGE_SHIFT, ( operation == BLKIF_OP_WRITE ? 1 : 0 ) ); @@ -757,7 +763,7 @@ return 0; } - else if ( RING_FULL(&blk_ring) ) + else if ( RING_FULL(&info->ring) ) { return 1; } @@ -774,7 +780,7 @@ } /* Fill out a communications ring structure. */ - req = RING_GET_REQUEST(&blk_ring, blk_ring.req_prod_pvt); + req = RING_GET_REQUEST(&info->ring, info->ring.req_prod_pvt); xid = GET_ID_FROM_FREELIST(); blk_shadow[xid].request = (unsigned long)id; @@ -782,15 +788,15 @@ req->id = xid; req->operation = operation; req->sector_number = (blkif_sector_t)sector_number; - req->handle = handle; + req->handle = handle; req->nr_segments = 1; /* install a grant reference. */ - ref = gnttab_claim_grant_reference(&gref_head, gref_terminal); + ref = gnttab_claim_grant_reference(&gref_head); ASSERT( ref != -ENOSPC ); gnttab_grant_foreign_access_ref( ref, - rdomid, + info->backend_id, buffer_ma >> PAGE_SHIFT, ( operation == BLKIF_OP_WRITE ? 1 : 0 ) ); @@ -798,11 +804,11 @@ req->frame_and_sects[0] = blkif_fas_from_gref(ref, fsect, lsect); - /* Keep a private copy so we can reissue requests when recovering. */ + /* Keep a private copy so we can reissue requests when recovering. */ pickle_request(&blk_shadow[xid], req); - blk_ring.req_prod_pvt++; - + info->ring.req_prod_pvt++; + return 0; } @@ -817,13 +823,13 @@ struct buffer_head *bh, *next_bh; int rw, nsect, full, queued = 0; - DPRINTK("Entered do_blkif_request\n"); + DPRINTK("Entered do_blkif_request\n"); while ( !rq->plugged && !list_empty(&rq->queue_head)) { - if ( (req = blkdev_entry_next_request(&rq->queue_head)) == NULL ) + if ( (req = blkdev_entry_next_request(&rq->queue_head)) == NULL ) goto out; - + DPRINTK("do_blkif_request %p: cmd %i, sec %lx, (%li/%li) bh:%p\n", req, req->cmd, req->sector, req->current_nr_sectors, req->nr_sectors, req->bh); @@ -844,16 +850,16 @@ full = blkif_queue_request( (unsigned long)bh, - (rw == READ) ? BLKIF_OP_READ : BLKIF_OP_WRITE, + (rw == READ) ? BLKIF_OP_READ : BLKIF_OP_WRITE, bh->b_data, bh->b_rsector, bh->b_size>>9, bh->b_rdev); if ( full ) - { + { bh->b_reqnext = next_bh; pending_queues[nr_pending++] = rq; if ( unlikely(nr_pending >= MAX_PENDING) ) BUG(); - goto out; + goto out; } queued++; @@ -861,7 +867,7 @@ /* Dequeue the buffer head from the request. */ nsect = bh->b_size >> 9; bh = req->bh = next_bh; - + if ( bh != NULL ) { /* There's another buffer head to do. Update the request. */ @@ -891,27 +897,27 @@ static void blkif_int(int irq, void *dev_id, struct pt_regs *ptregs) { - RING_IDX i, rp; - unsigned long flags; + RING_IDX i, rp; + unsigned long flags; struct buffer_head *bh, *next_bh; - - spin_lock_irqsave(&io_request_lock, flags); - - if ( unlikely(blkif_state == BLKIF_STATE_CLOSED || recovery) ) + + spin_lock_irqsave(&io_request_lock, flags); + + if ( unlikely(info->connected != BLKIF_STATE_CONNECTED || recovery) ) { spin_unlock_irqrestore(&io_request_lock, flags); return; } - rp = blk_ring.sring->rsp_prod; + rp = info->ring.sring->rsp_prod; rmb(); /* Ensure we see queued responses up to 'rp'. */ - for ( i = blk_ring.rsp_cons; i != rp; i++ ) + for ( i = info->ring.rsp_cons; i != rp; i++ ) { unsigned long id; blkif_response_t *bret; - - bret = RING_GET_RESPONSE(&blk_ring, i); + + bret = RING_GET_RESPONSE(&info->ring, i); id = bret->id; bh = (struct buffer_head *)blk_shadow[id].request; @@ -943,8 +949,8 @@ } } - blk_ring.rsp_cons = i; - + info->ring.rsp_cons = i; + kick_pending_request_queues(); spin_unlock_irqrestore(&io_request_lock, flags); @@ -954,24 +960,24 @@ /***************************** COMMON CODE *******************************/ -static void blkif_free(void) +static void blkif_free(struct blkfront_info *info) { /* Prevent new requests being issued until we fix things up. */ spin_lock_irq(&blkif_io_lock); - blkif_state = BLKIF_STATE_DISCONNECTED; + info->connected = BLKIF_STATE_DISCONNECTED; spin_unlock_irq(&blkif_io_lock); /* Free resources associated with old device channel. */ - if ( blk_ring.sring != NULL ) - { - free_page((unsigned long)blk_ring.sring); - blk_ring.sring = NULL; - } - unbind_evtchn_from_irqhandler(blkif_evtchn, NULL); - blkif_evtchn = 0; -} - -static void blkif_recover(void) + if ( info->ring.sring != NULL ) + { + free_page((unsigned long)info->ring.sring); + info->ring.sring = NULL; + } + unbind_evtchn_from_irqhandler(info->evtchn, NULL); + info->evtchn = 0; +} + +static void blkif_recover(struct blkfront_info *info) { int i; blkif_request_t *req; @@ -987,7 +993,7 @@ memset(&blk_shadow, 0, sizeof(blk_shadow)); for ( i = 0; i < BLK_RING_SIZE; i++ ) blk_shadow[i].req.id = i+1; - blk_shadow_free = blk_ring.req_prod_pvt; + blk_shadow_free = info->ring.req_prod_pvt; blk_shadow[BLK_RING_SIZE-1].req.id = 0x0fffffff; /* Stage 3: Find pending requests and requeue them. */ @@ -999,7 +1005,7 @@ /* Grab a request slot and unpickle shadow state into it. */ req = RING_GET_REQUEST( - &blk_ring, blk_ring.req_prod_pvt); + &info->ring, info->ring.req_prod_pvt); unpickle_request(req, ©[i]); /* We get a new request id, and must reset the shadow state. */ @@ -1012,7 +1018,7 @@ if ( req->frame_and_sects[j] & GRANTREF_INVALID ) gnttab_grant_foreign_access_ref( blkif_gref_from_fas(req->frame_and_sects[j]), - rdomid, + info->backend_id, blk_shadow[req->id].frame[j], rq_data_dir((struct request *) blk_shadow[req->id].request)); @@ -1020,32 +1026,31 @@ } blk_shadow[req->id].req = *req; - blk_ring.req_prod_pvt++; + info->ring.req_prod_pvt++; } kfree(copy); recovery = 0; - /* blk_ring->req_prod will be set when we flush_requests().*/ + /* info->ring->req_prod will be set when we flush_requests().*/ wmb(); /* Kicks things back into life. */ - flush_requests(); + flush_requests(info); /* Now safe to left other people use the interface. */ - blkif_state = BLKIF_STATE_CONNECTED; -} - -static void blkif_connect(u16 evtchn, domid_t domid) + info->connected = BLKIF_STATE_CONNECTED; +} + +static void blkif_connect(struct blkfront_info *info, u16 evtchn) { int err = 0; - blkif_evtchn = evtchn; - rdomid = domid; + info->evtchn = evtchn; err = bind_evtchn_to_irqhandler( - blkif_evtchn, blkif_int, SA_SAMPLE_RANDOM, "blkif", NULL); + info->evtchn, blkif_int, SA_SAMPLE_RANDOM, "blkif", info); if ( err != 0 ) { WPRINTK("bind_evtchn_to_irqhandler failed (err=%d)\n", err); @@ -1059,17 +1064,6 @@ { "" } }; -struct blkfront_info -{ - /* We watch the backend */ - struct xenbus_watch watch; - int vdevice; - u16 handle; - int connected; - struct xenbus_device *dev; - char *backend; -}; - static void watch_for_status(struct xenbus_watch *watch, const char *node) { struct blkfront_info *info; @@ -1081,35 +1075,33 @@ node += strlen(watch->node); /* FIXME: clean up when error on the other end. */ - if (info->connected) + if (info->connected == BLKIF_STATE_CONNECTED) return; - err = xenbus_gather(watch->node, + err = xenbus_gather(watch->node, "sectors", "%lu", §ors, "info", "%u", &binfo, "sector-size", "%lu", §or_size, NULL); if (err) { - xenbus_dev_error(info->dev, err, "reading backend fields"); + xenbus_dev_error(info->xbdev, err, "reading backend fields"); return; } - xlvbd_add(sectors, info->vdevice, info->handle, binfo, sector_size); - info->connected = 1; - - /* First to connect? blkif is now connected. */ - if (blkif_vbds_connected++ == 0) - blkif_state = BLKIF_STATE_CONNECTED; - - xenbus_dev_ok(info->dev); + xlvbd_add(sectors, info->vdevice, binfo, sector_size, info); + info->connected = BLKIF_STATE_CONNECTED; + + blkif_state = BLKIF_STATE_CONNECTED; + + xenbus_dev_ok(info->xbdev); /* Kick pending requests. */ spin_lock_irq(&blkif_io_lock); - kick_pending_request_queues(); + kick_pending_request_queues(info); spin_unlock_irq(&blkif_io_lock); } -static int setup_blkring(struct xenbus_device *dev, unsigned int backend_id) +static int setup_blkring(struct xenbus_device *dev, struct blkfront_info *info) { blkif_sring_t *sring; evtchn_op_t op = { .cmd = EVTCHNOP_alloc_unbound }; @@ -1121,25 +1113,28 @@ return -ENOMEM; } SHARED_RING_INIT(sring); - FRONT_RING_INIT(&blk_ring, sring, PAGE_SIZE); - - shmem_ref = gnttab_claim_grant_reference(&gref_head, - gref_terminal); - ASSERT(shmem_ref != -ENOSPC); - gnttab_grant_foreign_access_ref(shmem_ref, - backend_id, - virt_to_mfn(blk_ring.sring), - 0); - - op.u.alloc_unbound.dom = backend_id; + FRONT_RING_INIT(&info->ring, sring, PAGE_SIZE); + + err = gnttab_grant_foreign_access(info->backend_id, + virt_to_mfn(info->ring.sring), 0); + if (err == -ENOSPC) { + free_page((unsigned long)info->ring.sring); + info->ring.sring = 0; + xenbus_dev_error(dev, err, "granting access to ring page"); + return err; + } + info->grant_id = err; + + op.u.alloc_unbound.dom = info->backend_id; err = HYPERVISOR_event_channel_op(&op); if (err) { - free_page((unsigned long)blk_ring.sring); - blk_ring.sring = 0; + gnttab_end_foreign_access(info->grant_id, 0); + free_page((unsigned long)info->ring.sring); + info->ring.sring = 0; xenbus_dev_error(dev, err, "allocating event channel"); return err; } - blkif_connect(op.u.alloc_unbound.port, backend_id); + blkif_connect(info, op.u.alloc_unbound.port); return 0; } @@ -1149,11 +1144,11 @@ { char *backend; const char *message; - int err, backend_id; + int err; backend = NULL; err = xenbus_gather(dev->nodename, - "backend-id", "%i", &backend_id, + "backend-id", "%i", &info->backend_id, "backend", NULL, &backend, NULL); if (XENBUS_EXIST_ERR(err)) @@ -1168,12 +1163,10 @@ goto out; } - /* First device? We create shared ring, alloc event channel. */ - if (blkif_vbds == 0) { - err = setup_blkring(dev, backend_id); - if (err) - goto out; - } + /* Create shared ring, alloc event channel. */ + err = setup_blkring(dev, info); + if (err) + goto out; err = xenbus_transaction_start(dev->nodename); if (err) { @@ -1181,13 +1174,13 @@ goto destroy_blkring; } - err = xenbus_printf(dev->nodename, "grant-id","%u", shmem_ref); + err = xenbus_printf(dev->nodename, "grant-id","%u", info->grant_id); if (err) { message = "writing grant-id"; goto abort_transaction; } err = xenbus_printf(dev->nodename, - "event-channel", "%u", blkif_evtchn); + "event-channel", "%u", info->evtchn); if (err) { message = "writing event-channel"; goto abort_transaction; @@ -1220,8 +1213,7 @@ /* Have to do this *outside* transaction. */ xenbus_dev_error(dev, err, "%s", message); destroy_blkring: - if (blkif_vbds == 0) - blkif_free(); + blkif_free(info); goto out; } @@ -1250,9 +1242,11 @@ xenbus_dev_error(dev, err, "allocating info structure"); return err; } - info->dev = dev; + info->xbdev = dev; info->vdevice = vdevice; - info->connected = 0; + info->connected = BLKIF_STATE_DISCONNECTED; + info->mi = NULL; + INIT_WORK(&info->work, blkif_restart_queue, (void *)info); /* Front end dir is a number, which is used as the id. */ info->handle = simple_strtoul(strrchr(dev->nodename,'/')+1, NULL, 0); @@ -1266,7 +1260,6 @@ /* Call once in case entries already there. */ watch_for_status(&info->watch, info->watch.node); - blkif_vbds++; return 0; } @@ -1277,15 +1270,13 @@ if (info->backend) unregister_xenbus_watch(&info->watch); - if (info->connected) { - xlvbd_del(info->handle); - blkif_vbds_connected--; - } + if (info->mi) + xlvbd_del(info); + + blkif_free(info); + kfree(info->backend); kfree(info); - - if (--blkif_vbds == 0) - blkif_free(); return 0; } @@ -1298,10 +1289,8 @@ kfree(info->backend); info->backend = NULL; - if (--blkif_vbds == 0) { - recovery = 1; - blkif_free(); - } + recovery = 1; + blkif_free(info); return 0; } @@ -1314,8 +1303,7 @@ /* FIXME: Check geometry hasn't changed here... */ err = talk_to_backend(dev, info); if (!err) { - if (blkif_vbds++ == 0) - blkif_recover(); + blkif_recover(info); } return err; } @@ -1363,11 +1351,6 @@ { int i; - /* A grant for every ring slot, plus one for the ring itself. */ - if (gnttab_alloc_grant_references(MAXIMUM_OUTSTANDING_BLOCK_REQS + 1, - &gref_head, &gref_terminal) < 0) - return 1; - if ( (xen_start_info.flags & SIF_INITDOMAIN) || (xen_start_info.flags & SIF_BLK_BE_DOMAIN) ) return 0; @@ -1391,6 +1374,6 @@ { int i; for ( i = 0; i < s->req.nr_segments; i++ ) - gnttab_release_grant_reference( - &gref_head, blkif_gref_from_fas(s->req.frame_and_sects[i])); -} + gnttab_free_grant_reference( + blkif_gref_from_fas(s->req.frame_and_sects[i])); +} diff -r 6783e59e1c45 -r 522bc50588ed linux-2.6-xen-sparse/drivers/xen/blkfront/block.h --- a/linux-2.6-xen-sparse/drivers/xen/blkfront/block.h Tue Aug 23 18:25:51 2005 +++ b/linux-2.6-xen-sparse/drivers/xen/blkfront/block.h Tue Aug 23 18:27:22 2005 @@ -46,6 +46,7 @@ #include <linux/major.h> #include <linux/devfs_fs_kernel.h> #include <asm-xen/hypervisor.h> +#include <asm-xen/xenbus.h> #include <asm-xen/xen-public/xen.h> #include <asm-xen/xen-public/io/blkif.h> #include <asm-xen/xen-public/io/ring.h> @@ -79,11 +80,20 @@ #define DPRINTK_IOCTL(_f, _a...) ((void)0) #endif -struct xlbd_type_info { - int partn_shift; - int disks_per_major; - char *devname; - char *diskname; +struct xlbd_type_info +{ + int partn_shift; + int disks_per_major; + char *devname; + char *diskname; +}; + +struct xlbd_major_info +{ + int major; + int index; + int usage; + struct xlbd_type_info *type; }; /* @@ -91,26 +101,27 @@ * hang in private_data off the gendisk structure. We may end up * putting all kinds of interesting stuff here :-) */ -struct xlbd_major_info { - int major; - int index; - int usage; - struct xlbd_type_info *type; +struct blkfront_info +{ + struct xenbus_device *xbdev; + /* We watch the backend */ + struct xenbus_watch watch; + dev_t dev; + int vdevice; + blkif_vdev_t handle; + int connected; + char *backend; + int backend_id; + int grant_id; + blkif_front_ring_t ring; + unsigned int evtchn; + struct xlbd_major_info *mi; +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0) + request_queue_t *rq; +#endif + struct work_struct work; + struct gnttab_free_callback callback; }; - -struct xlbd_disk_info { - int xd_device; - blkif_vdev_t handle; - struct xlbd_major_info *mi; -#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0) - struct xlbd_disk_info *next_waiting; - request_queue_t *rq; -#endif -}; - -typedef struct xen_block { - int usage; -} xen_block_t; extern spinlock_t blkif_io_lock; @@ -123,7 +134,7 @@ extern void do_blkif_request (request_queue_t *rq); /* Virtual block-device subsystem. */ -int xlvbd_add(blkif_sector_t capacity, int device, blkif_vdev_t handle, - u16 info, u16 sector_size); -void xlvbd_del(blkif_vdev_t handle); +int xlvbd_add(blkif_sector_t capacity, int device, + u16 vdisk_info, u16 sector_size, struct blkfront_info *info); +void xlvbd_del(struct blkfront_info *info); #endif /* __XEN_DRIVERS_BLOCK_H__ */ diff -r 6783e59e1c45 -r 522bc50588ed linux-2.6-xen-sparse/drivers/xen/blkfront/vbd.c --- a/linux-2.6-xen-sparse/drivers/xen/blkfront/vbd.c Tue Aug 23 18:25:51 2005 +++ b/linux-2.6-xen-sparse/drivers/xen/blkfront/vbd.c Tue Aug 23 18:27:22 2005 @@ -43,325 +43,269 @@ #define NUM_SCSI_MAJORS 9 #define NUM_VBD_MAJORS 1 -struct lvdisk -{ - blkif_sector_t capacity; /* 0: Size in terms of 512-byte sectors. */ - blkif_vdev_t handle; /* 8: Device number (opaque 16 bit value). */ - u16 info; - dev_t dev; - struct list_head list; +static struct xlbd_type_info xlbd_ide_type = { + .partn_shift = 6, + .disks_per_major = 2, + .devname = "ide", + .diskname = "hd", }; -static struct xlbd_type_info xlbd_ide_type = { - .partn_shift = 6, - .disks_per_major = 2, - .devname = "ide", - .diskname = "hd", +static struct xlbd_type_info xlbd_scsi_type = { + .partn_shift = 4, + .disks_per_major = 16, + .devname = "sd", + .diskname = "sd", }; -static struct xlbd_type_info xlbd_scsi_type = { - .partn_shift = 4, - .disks_per_major = 16, - .devname = "sd", - .diskname = "sd", +static struct xlbd_type_info xlbd_vbd_type = { + .partn_shift = 4, + .disks_per_major = 16, + .devname = "xvd", + .diskname = "xvd", }; -static struct xlbd_type_info xlbd_vbd_type = { - .partn_shift = 4, - .disks_per_major = 16, - .devname = "xvd", - .diskname = "xvd", -}; - static struct xlbd_major_info *major_info[NUM_IDE_MAJORS + NUM_SCSI_MAJORS + - NUM_VBD_MAJORS]; - -#define XLBD_MAJOR_IDE_START 0 -#define XLBD_MAJOR_SCSI_START (NUM_IDE_MAJORS) -#define XLBD_MAJOR_VBD_START (NUM_IDE_MAJORS + NUM_SCSI_MAJORS) - -#define XLBD_MAJOR_IDE_RANGE XLBD_MAJOR_IDE_START ... XLBD_MAJOR_SCSI_START - 1 -#define XLBD_MAJOR_SCSI_RANGE XLBD_MAJOR_SCSI_START ... XLBD_MAJOR_VBD_START - 1 -#define XLBD_MAJOR_VBD_RANGE XLBD_MAJOR_VBD_START ... XLBD_MAJOR_VBD_START + NUM_VBD_MAJORS - 1 + NUM_VBD_MAJORS]; + +#define XLBD_MAJOR_IDE_START 0 +#define XLBD_MAJOR_SCSI_START (NUM_IDE_MAJORS) +#define XLBD_MAJOR_VBD_START (NUM_IDE_MAJORS + NUM_SCSI_MAJORS) + +#define XLBD_MAJOR_IDE_RANGE XLBD_MAJOR_IDE_START ... XLBD_MAJOR_SCSI_START - 1 +#define XLBD_MAJOR_SCSI_RANGE XLBD_MAJOR_SCSI_START ... XLBD_MAJOR_VBD_START - 1 +#define XLBD_MAJOR_VBD_RANGE XLBD_MAJOR_VBD_START ... XLBD_MAJOR_VBD_START + NUM_VBD_MAJORS - 1 /* Information about our VBDs. */ #define MAX_VBDS 64 static LIST_HEAD(vbds_list); -#define MAJOR_XEN(dev) ((dev)>>8) -#define MINOR_XEN(dev) ((dev) & 0xff) - -static struct block_device_operations xlvbd_block_fops = -{ - .owner = THIS_MODULE, - .open = blkif_open, - .release = blkif_release, - .ioctl = blkif_ioctl, +static struct block_device_operations xlvbd_block_fops = +{ + .owner = THIS_MODULE, + .open = blkif_open, + .release = blkif_release, + .ioctl = blkif_ioctl, }; spinlock_t blkif_io_lock = SPIN_LOCK_UNLOCKED; -static struct lvdisk *xlvbd_device_alloc(void) -{ - struct lvdisk *disk; - - disk = kmalloc(sizeof(*disk), GFP_KERNEL); - if (disk != NULL) { - memset(disk, 0, sizeof(*disk)); - INIT_LIST_HEAD(&disk->list); - } - return disk; -} - -static void xlvbd_device_free(struct lvdisk *disk) -{ - list_del(&disk->list); - kfree(disk); -} - -static struct xlbd_major_info *xlbd_alloc_major_info( - int major, int minor, int index) -{ - struct xlbd_major_info *ptr; - - ptr = kmalloc(sizeof(struct xlbd_major_info), GFP_KERNEL); - if (ptr == NULL) - return NULL; - - memset(ptr, 0, sizeof(struct xlbd_major_info)); - - ptr->major = major; - - switch (index) { - case XLBD_MAJOR_IDE_RANGE: - ptr->type = &xlbd_ide_type; - ptr->index = index - XLBD_MAJOR_IDE_START; - break; - case XLBD_MAJOR_SCSI_RANGE: - ptr->type = &xlbd_scsi_type; - ptr->index = index - XLBD_MAJOR_SCSI_START; - break; - case XLBD_MAJOR_VBD_RANGE: - ptr->type = &xlbd_vbd_type; - ptr->index = index - XLBD_MAJOR_VBD_START; - break; - } - - printk("Registering block device major %i\n", ptr->major); - if (register_blkdev(ptr->major, ptr->type->devname)) { - WPRINTK("can't get major %d with name %s\n", - ptr->major, ptr->type->devname); - kfree(ptr); - return NULL; - } - - devfs_mk_dir(ptr->type->devname); - major_info[index] = ptr; - return ptr; -} - -static struct xlbd_major_info *xlbd_get_major_info(int device) -{ - int major, minor, index; - - major = MAJOR_XEN(device); - minor = MINOR_XEN(device); - - switch (major) { - case IDE0_MAJOR: index = 0; break; - case IDE1_MAJOR: index = 1; break; - case IDE2_MAJOR: index = 2; break; - case IDE3_MAJOR: index = 3; break; - case IDE4_MAJOR: index = 4; break; - case IDE5_MAJOR: index = 5; break; - case IDE6_MAJOR: index = 6; break; - case IDE7_MAJOR: index = 7; break; - case IDE8_MAJOR: index = 8; break; - case IDE9_MAJOR: index = 9; break; - case SCSI_DISK0_MAJOR: index = 10; break; - case SCSI_DISK1_MAJOR ... SCSI_DISK7_MAJOR: - index = 11 + major - SCSI_DISK1_MAJOR; - break; - case SCSI_CDROM_MAJOR: index = 18; break; - default: index = 19; break; - } - - return ((major_info[index] != NULL) ? major_info[index] : - xlbd_alloc_major_info(major, minor, index)); -} - -static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size) -{ - request_queue_t *rq; - - rq = blk_init_queue(do_blkif_request, &blkif_io_lock); - if (rq == NULL) - return -1; - - elevator_init(rq, "noop"); - - /* Hard sector size and max sectors impersonate the equiv. hardware. */ - blk_queue_hardsect_size(rq, sector_size); - blk_queue_max_sectors(rq, 512); - - /* Each segment in a request is up to an aligned page in size. */ - blk_queue_segment_boundary(rq, PAGE_SIZE - 1); - blk_queue_max_segment_size(rq, PAGE_SIZE); - - /* Ensure a merged request will fit in a single I/O ring slot. */ - blk_queue_max_phys_segments(rq, BLKIF_MAX_SEGMENTS_PER_REQUEST); - blk_queue_max_hw_segments(rq, BLKIF_MAX_SEGMENTS_PER_REQUEST); - - /* Make sure buffer addresses are sector-aligned. */ - blk_queue_dma_alignment(rq, 511); - - gd->queue = rq; - - return 0; -} - -static struct gendisk *xlvbd_alloc_gendisk( - struct xlbd_major_info *mi, int minor, blkif_sector_t capacity, - int device, blkif_vdev_t handle, u16 info, u16 sector_size) -{ - struct gendisk *gd; - struct xlbd_disk_info *di; - int nr_minors = 1; - - di = kmalloc(sizeof(struct xlbd_disk_info), GFP_KERNEL); - if (di == NULL) - return NULL; - memset(di, 0, sizeof(*di)); - di->mi = mi; - di->xd_device = device; - di->handle = handle; - - if ((minor & ((1 << mi->type->partn_shift) - 1)) == 0) - nr_minors = 1 << mi->type->partn_shift; - - gd = alloc_disk(nr_minors); - if (gd == NULL) - goto out; - - if (nr_minors > 1) - sprintf(gd->disk_name, "%s%c", mi->type->diskname, - 'a' + mi->index * mi->type->disks_per_major + - (minor >> mi->type->partn_shift)); - else - sprintf(gd->disk_name, "%s%c%d", mi->type->diskname, - 'a' + mi->index * mi->type->disks_per_major + - (minor >> mi->type->partn_shift), - minor & ((1 << mi->type->partn_shift) - 1)); - - gd->major = mi->major; - gd->first_minor = minor; - gd->fops = &xlvbd_block_fops; - gd->private_data = di; - set_capacity(gd, capacity); - - if (xlvbd_init_blk_queue(gd, sector_size)) { - del_gendisk(gd); - goto out; - } - - di->rq = gd->queue; - - if (info & VDISK_READONLY) - set_disk_ro(gd, 1); - - if (info & VDISK_REMOVABLE) - gd->flags |= GENHD_FL_REMOVABLE; - - if (info & VDISK_CDROM) - gd->flags |= GENHD_FL_CD; - - add_disk(gd); - - return gd; - -out: - kfree(di); - return NULL; -} - -int xlvbd_add(blkif_sector_t capacity, int device, blkif_vdev_t handle, - u16 info, u16 sector_size) -{ - struct lvdisk *new; - struct block_device *bd; - struct gendisk *gd; - struct xlbd_major_info *mi; - - mi = xlbd_get_major_info(device); - if (mi == NULL) - return -EPERM; - - new = xlvbd_device_alloc(); - if (new == NULL) - return -ENOMEM; - new->capacity = capacity; - new->info = info; - new->handle = handle; - new->dev = MKDEV(MAJOR_XEN(device), MINOR_XEN(device)); - - bd = bdget(new->dev); - if (bd == NULL) - goto out; - - gd = xlvbd_alloc_gendisk(mi, MINOR_XEN(device), capacity, device, handle, - info, sector_size); - if (gd == NULL) - goto out_bd; - - list_add(&new->list, &vbds_list); -out_bd: - bdput(bd); -out: - return 0; -} - -static int xlvbd_device_del(struct lvdisk *disk) -{ - struct block_device *bd; - struct gendisk *gd; - struct xlbd_disk_info *di; - int ret = 0, unused; - request_queue_t *rq; - - bd = bdget(disk->dev); - if (bd == NULL) - return -1; - - gd = get_gendisk(disk->dev, &unused); - di = gd->private_data; - -#if 0 /* This is wrong: hda and hdb share same major, for example. */ - if (di->mi->usage != 0) { - WPRINTK("disk removal failed: used [dev=%x]\n", disk->dev); - ret = -1; - goto out; - } -#endif - - rq = gd->queue; - del_gendisk(gd); - put_disk(gd); - blk_cleanup_queue(rq); - - xlvbd_device_free(disk); - bdput(bd); - return ret; -} - -void xlvbd_del(blkif_vdev_t handle) -{ - struct lvdisk *i; - - list_for_each_entry(i, &vbds_list, list) { - if (i->handle == handle) { - xlvbd_device_del(i); - return; - } +static struct xlbd_major_info * +xlbd_alloc_major_info(int major, int minor, int index) +{ + struct xlbd_major_info *ptr; + + ptr = kmalloc(sizeof(struct xlbd_major_info), GFP_KERNEL); + if (ptr == NULL) + return NULL; + + memset(ptr, 0, sizeof(struct xlbd_major_info)); + + ptr->major = major; + + switch (index) { + case XLBD_MAJOR_IDE_RANGE: + ptr->type = &xlbd_ide_type; + ptr->index = index - XLBD_MAJOR_IDE_START; + break; + case XLBD_MAJOR_SCSI_RANGE: + ptr->type = &xlbd_scsi_type; + ptr->index = index - XLBD_MAJOR_SCSI_START; + break; + case XLBD_MAJOR_VBD_RANGE: + ptr->type = &xlbd_vbd_type; + ptr->index = index - XLBD_MAJOR_VBD_START; + break; } - BUG(); -} + + printk("Registering block device major %i\n", ptr->major); + if (register_blkdev(ptr->major, ptr->type->devname)) { + WPRINTK("can't get major %d with name %s\n", + ptr->major, ptr->type->devname); + kfree(ptr); + return NULL; + } + + devfs_mk_dir(ptr->type->devname); + major_info[index] = ptr; + return ptr; +} + +static struct xlbd_major_info * +xlbd_get_major_info(int vdevice) +{ + struct xlbd_major_info *mi; + int major, minor, index; + + major = BLKIF_MAJOR(vdevice); + minor = BLKIF_MINOR(vdevice); + + switch (major) { + case IDE0_MAJOR: index = 0; break; + case IDE1_MAJOR: index = 1; break; + case IDE2_MAJOR: index = 2; break; + case IDE3_MAJOR: index = 3; break; + case IDE4_MAJOR: index = 4; break; + case IDE5_MAJOR: index = 5; break; + case IDE6_MAJOR: index = 6; break; + case IDE7_MAJOR: index = 7; break; + case IDE8_MAJOR: index = 8; break; + case IDE9_MAJOR: index = 9; break; + case SCSI_DISK0_MAJOR: index = 10; break; + case SCSI_DISK1_MAJOR ... SCSI_DISK7_MAJOR: + index = 11 + major - SCSI_DISK1_MAJOR; + break; + case SCSI_CDROM_MAJOR: index = 18; break; + default: index = 19; break; + } + + mi = ((major_info[index] != NULL) ? major_info[index] : + xlbd_alloc_major_info(major, minor, index)); + mi->usage++; + return mi; +} + +static void +xlbd_put_major_info(struct xlbd_major_info *mi) +{ + mi->usage--; + /* XXX: release major if 0 */ +} + +static int +xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size) +{ + request_queue_t *rq; + + rq = blk_init_queue(do_blkif_request, &blkif_io_lock); + if (rq == NULL) + return -1; + + elevator_init(rq, "noop"); + + /* Hard sector size and max sectors impersonate the equiv. hardware. */ + blk_queue_hardsect_size(rq, sector_size); + blk_queue_max_sectors(rq, 512); + + /* Each segment in a request is up to an aligned page in size. */ + blk_queue_segment_boundary(rq, PAGE_SIZE - 1); + blk_queue_max_segment_size(rq, PAGE_SIZE); + + /* Ensure a merged request will fit in a single I/O ring slot. */ + blk_queue_max_phys_segments(rq, BLKIF_MAX_SEGMENTS_PER_REQUEST); + blk_queue_max_hw_segments(rq, BLKIF_MAX_SEGMENTS_PER_REQUEST); + + /* Make sure buffer addresses are sector-aligned. */ + blk_queue_dma_alignment(rq, 511); + + gd->queue = rq; + + return 0; +} + +static int +xlvbd_alloc_gendisk(int minor, blkif_sector_t capacity, int vdevice, + u16 vdisk_info, u16 sector_size, + struct blkfront_info *info) +{ + struct gendisk *gd; + struct xlbd_major_info *mi; + int nr_minors = 1; + int err = -ENODEV; + + mi = xlbd_get_major_info(vdevice); + if (mi == NULL) + goto out; + info->mi = mi; + + if ((minor & ((1 << mi->type->partn_shift) - 1)) == 0) + nr_minors = 1 << mi->type->partn_shift; + + gd = alloc_disk(nr_minors); + if (gd == NULL) + goto out; + + if (nr_minors > 1) + sprintf(gd->disk_name, "%s%c", mi->type->diskname, + 'a' + mi->index * mi->type->disks_per_major + + (minor >> mi->type->partn_shift)); + else + sprintf(gd->disk_name, "%s%c%d", mi->type->diskname, + 'a' + mi->index * mi->type->disks_per_major + + (minor >> mi->type->partn_shift), + minor & ((1 << mi->type->partn_shift) - 1)); + + gd->major = mi->major; + gd->first_minor = minor; + gd->fops = &xlvbd_block_fops; + gd->private_data = info; + set_capacity(gd, capacity); + + if (xlvbd_init_blk_queue(gd, sector_size)) { + del_gendisk(gd); + goto out; + } + + info->rq = gd->queue; + + if (vdisk_info & VDISK_READONLY) + set_disk_ro(gd, 1); + + if (vdisk_info & VDISK_REMOVABLE) + gd->flags |= GENHD_FL_REMOVABLE; + + if (vdisk_info & VDISK_CDROM) + gd->flags |= GENHD_FL_CD; + + add_disk(gd); + + return 0; + + out: + if (mi) + xlbd_put_major_info(mi); + return err; +} + +int +xlvbd_add(blkif_sector_t capacity, int vdevice, u16 vdisk_info, + u16 sector_size, struct blkfront_info *info) +{ + struct block_device *bd; + int err = 0; + + info->dev = MKDEV(BLKIF_MAJOR(vdevice), BLKIF_MINOR(vdevice)); + + bd = bdget(info->dev); + if (bd == NULL) + return -ENODEV; + + err = xlvbd_alloc_gendisk(BLKIF_MINOR(vdevice), capacity, vdevice, + vdisk_info, sector_size, info); + + bdput(bd); + return err; +} + +void +xlvbd_del(struct blkfront_info *info) +{ + struct block_device *bd; + struct gendisk *gd; + int unused; + request_queue_t *rq; + + bd = bdget(info->dev); + if (bd == NULL) + return; + + gd = get_gendisk(info->dev, &unused); + rq = gd->queue; + + del_gendisk(gd); + put_disk(gd); + xlbd_put_major_info(info->mi); + info->mi = NULL; + blk_cleanup_queue(rq); + + bdput(bd); +} diff -r 6783e59e1c45 -r 522bc50588ed linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c --- a/linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c Tue Aug 23 18:25:51 2005 +++ b/linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c Tue Aug 23 18:27:22 2005 @@ -102,12 +102,12 @@ #endif #ifdef CONFIG_XEN_NETDEV_GRANT_TX -static grant_ref_t gref_tx_head, gref_tx_terminal; +static grant_ref_t gref_tx_head; static grant_ref_t grant_tx_ref[NETIF_TX_RING_SIZE + 1]; #endif #ifdef CONFIG_XEN_NETDEV_GRANT_RX -static grant_ref_t gref_rx_head, gref_rx_terminal; +static grant_ref_t gref_rx_head; static grant_ref_t grant_rx_ref[NETIF_RX_RING_SIZE + 1]; #endif @@ -441,8 +441,8 @@ np->rx->ring[MASK_NETIF_RX_IDX(req_prod + i)].req.id = id; #ifdef CONFIG_XEN_NETDEV_GRANT_RX - if (unlikely((ref = gnttab_claim_grant_reference(&gref_rx_head, - gref_rx_terminal)) < 0)) { + ref = gnttab_claim_grant_reference(&gref_rx_head); + if (unlikely(ref < 0)) { printk(KERN_ALERT "#### netfront can't claim rx reference\n"); BUG(); } @@ -537,8 +537,8 @@ tx->id = id; #ifdef CONFIG_XEN_NETDEV_GRANT_TX - if (unlikely((ref = gnttab_claim_grant_reference(&gref_tx_head, - gref_tx_terminal)) < 0)) { + ref = gnttab_claim_grant_reference(&gref_tx_head); + if (unlikely(ref < 0)) { printk(KERN_ALERT "#### netfront can't claim tx grant reference\n"); BUG(); } @@ -929,8 +929,7 @@ msg->handle = np->handle; msg->tx_shmem_frame = virt_to_mfn(np->tx); #ifdef CONFIG_XEN_NETDEV_GRANT_TX - msg->tx_shmem_ref = (u32)gnttab_claim_grant_reference(&gref_tx_head, - gref_tx_terminal); + msg->tx_shmem_ref = (u32)gnttab_claim_grant_reference(&gref_tx_head); if(msg->tx_shmem_ref < 0) { printk(KERN_ALERT "#### netfront can't claim tx_shmem reference\n"); BUG(); @@ -941,8 +940,7 @@ msg->rx_shmem_frame = virt_to_mfn(np->rx); #ifdef CONFIG_XEN_NETDEV_GRANT_RX - msg->rx_shmem_ref = (u32)gnttab_claim_grant_reference(&gref_rx_head, - gref_rx_terminal); + msg->rx_shmem_ref = (u32)gnttab_claim_grant_reference(&gref_rx_head); if(msg->rx_shmem_ref < 0) { printk(KERN_ALERT "#### netfront can't claim rx_shmem reference\n"); BUG(); @@ -1420,7 +1418,7 @@ #ifdef CONFIG_XEN_NETDEV_GRANT_TX /* A grant for every ring slot, plus one for the ring itself */ if (gnttab_alloc_grant_references(NETIF_TX_RING_SIZE + 1, - &gref_tx_head, &gref_tx_terminal) < 0) { + &gref_tx_head) < 0) { printk(KERN_ALERT "#### netfront can't alloc tx grant refs\n"); return 1; } @@ -1429,7 +1427,7 @@ #ifdef CONFIG_XEN_NETDEV_GRANT_RX /* A grant for every ring slot, plus one for the ring itself */ if (gnttab_alloc_grant_references(NETIF_RX_RING_SIZE + 1, - &gref_rx_head, &gref_rx_terminal) < 0) { + &gref_rx_head) < 0) { printk(KERN_ALERT "#### netfront can't alloc rx grant refs\n"); return 1; } @@ -1457,10 +1455,10 @@ static void netif_exit(void) { #ifdef CONFIG_XEN_NETDEV_GRANT_TX - gnttab_free_grant_references(NETIF_TX_RING_SIZE + 1, gref_tx_head); + gnttab_free_grant_references(gref_tx_head); #endif #ifdef CONFIG_XEN_NETDEV_GRANT_RX - gnttab_free_grant_references(NETIF_RX_RING_SIZE + 1, gref_rx_head); + gnttab_free_grant_references(gref_rx_head); #endif } diff -r 6783e59e1c45 -r 522bc50588ed linux-2.6-xen-sparse/drivers/xen/privcmd/privcmd.c --- a/linux-2.6-xen-sparse/drivers/xen/privcmd/privcmd.c Tue Aug 23 18:25:51 2005 +++ b/linux-2.6-xen-sparse/drivers/xen/privcmd/privcmd.c Tue Aug 23 18:27:22 2005 @@ -167,7 +167,7 @@ if (ret) goto batch_err; - u.val = (mfn << PAGE_SHIFT) | pgprot_val(vma->vm_page_prot); + u.val = pte_val_ma(pfn_pte_ma(mfn, vma->vm_page_prot)); u.ptr = ptep; if ( unlikely(HYPERVISOR_mmu_update(&u, 1, NULL, m.dom) < 0) ) diff -r 6783e59e1c45 -r 522bc50588ed linux-2.6-xen-sparse/include/asm-xen/asm-i386/page.h --- a/linux-2.6-xen-sparse/include/asm-xen/asm-i386/page.h Tue Aug 23 18:25:51 2005 +++ b/linux-2.6-xen-sparse/include/asm-xen/asm-i386/page.h Tue Aug 23 18:27:22 2005 @@ -60,9 +60,13 @@ #define copy_user_page(to, from, vaddr, pg) copy_page(to, from) /**** MACHINE <-> PHYSICAL CONVERSION MACROS ****/ +#define INVALID_P2M_ENTRY (~0U) +#define FOREIGN_FRAME(m) ((m) | 0x80000000U) extern unsigned int *phys_to_machine_mapping; -#define pfn_to_mfn(_pfn) ((unsigned long)(phys_to_machine_mapping[(_pfn)])) -#define mfn_to_pfn(_mfn) ((unsigned long)(machine_to_phys_mapping[(_mfn)])) +#define pfn_to_mfn(pfn) \ +((unsigned long)phys_to_machine_mapping[(unsigned int)(pfn)] & 0x7FFFFFFFUL) +#define mfn_to_pfn(mfn) \ +((unsigned long)machine_to_phys_mapping[(unsigned int)(mfn)]) /* Definitions for machine and pseudophysical addresses. */ #ifdef CONFIG_X86_PAE diff -r 6783e59e1c45 -r 522bc50588ed linux-2.6-xen-sparse/include/asm-xen/asm-i386/pgtable-2level.h --- a/linux-2.6-xen-sparse/include/asm-xen/asm-i386/pgtable-2level.h Tue Aug 23 18:25:51 2005 +++ b/linux-2.6-xen-sparse/include/asm-xen/asm-i386/pgtable-2level.h Tue Aug 23 18:27:22 2005 @@ -63,17 +63,15 @@ * * NB2. When deliberately mapping foreign pages into the p2m table, you *must* * use FOREIGN_FRAME(). This will cause pte_pfn() to choke on it, as we - * require. In all the cases we care about, the high bit gets shifted out - * (e.g., phys_to_machine()) so behaviour there is correct. + * require. In all the cases we care about, the FOREIGN_FRAME bit is + * masked (e.g., pfn_to_mfn()) so behaviour there is correct. */ -#define INVALID_P2M_ENTRY (~0U) -#define FOREIGN_FRAME(_m) ((_m) | (1UL<<((sizeof(unsigned long)*8)-1))) #define pte_mfn(_pte) ((_pte).pte_low >> PAGE_SHIFT) #define pte_pfn(_pte) \ ({ \ unsigned long mfn = pte_mfn(_pte); \ unsigned long pfn = mfn_to_pfn(mfn); \ - if ((pfn >= max_mapnr) || (pfn_to_mfn(pfn) != mfn)) \ + if ((pfn >= max_mapnr) || (phys_to_machine_mapping[pfn] != mfn))\ pfn = max_mapnr; /* special: force !pfn_valid() */ \ pfn; \ }) diff -r 6783e59e1c45 -r 522bc50588ed linux-2.6-xen-sparse/include/asm-xen/asm-i386/pgtable-3level.h --- a/linux-2.6-xen-sparse/include/asm-xen/asm-i386/pgtable-3level.h Tue Aug 23 18:25:51 2005 +++ b/linux-2.6-xen-sparse/include/asm-xen/asm-i386/pgtable-3level.h Tue Aug 23 18:27:22 2005 @@ -150,15 +150,13 @@ return !pte.pte_low && !pte.pte_high; } -#define INVALID_P2M_ENTRY (~0U) -#define FOREIGN_FRAME(_m) ((_m) | (1UL<<((sizeof(unsigned long)*8)-1))) #define pte_mfn(_pte) ( ((_pte).pte_low >> PAGE_SHIFT) |\ (((_pte).pte_high & 0xfff) << (32-PAGE_SHIFT)) ) #define pte_pfn(_pte) \ ({ \ unsigned long mfn = pte_mfn(_pte); \ unsigned long pfn = mfn_to_pfn(mfn); \ - if ((pfn >= max_mapnr) || (pfn_to_mfn(pfn) != mfn)) \ + if ((pfn >= max_mapnr) || (phys_to_machine_mapping[pfn] != mfn))\ pfn = max_mapnr; /* special: force !pfn_valid() */ \ pfn; \ }) diff -r 6783e59e1c45 -r 522bc50588ed linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/page.h --- a/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/page.h Tue Aug 23 18:25:51 2005 +++ b/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/page.h Tue Aug 23 18:27:22 2005 @@ -62,9 +62,13 @@ #define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE /**** MACHINE <-> PHYSICAL CONVERSION MACROS ****/ +#define INVALID_P2M_ENTRY (~0U) +#define FOREIGN_FRAME(m) ((m) | 0x80000000U) extern u32 *phys_to_machine_mapping; -#define pfn_to_mfn(_pfn) ((unsigned long) phys_to_machine_mapping[(unsigned int)(_pfn)]) -#define mfn_to_pfn(_mfn) ((unsigned long) machine_to_phys_mapping[(unsigned int)(_mfn)]) +#define pfn_to_mfn(pfn) \ +((unsigned long)phys_to_machine_mapping[(unsigned int)(pfn)] & 0x7FFFFFFFUL) +#define mfn_to_pfn(mfn) \ +((unsigned long)machine_to_phys_mapping[(unsigned int)(mfn)]) /* Definitions for machine and pseudophysical addresses. */ typedef unsigned long paddr_t; diff -r 6783e59e1c45 -r 522bc50588ed linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/pgtable.h --- a/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/pgtable.h Tue Aug 23 18:25:51 2005 +++ b/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/pgtable.h Tue Aug 23 18:27:22 2005 @@ -300,17 +300,15 @@ * * NB2. When deliberately mapping foreign pages into the p2m table, you *must* * use FOREIGN_FRAME(). This will cause pte_pfn() to choke on it, as we - * require. In all the cases we care about, the high bit gets shifted out - * (e.g., phys_to_machine()) so behaviour there is correct. - */ -#define INVALID_P2M_ENTRY (~0U) -#define FOREIGN_FRAME(_m) ((_m) | (1UL<<((sizeof(unsigned long)*8)-1))) + * require. In all the cases we care about, the FOREIGN_FRAME bit is + * masked (e.g., pfn_to_mfn()) so behaviour there is correct. + */ #define pte_mfn(_pte) (((_pte).pte & PTE_MASK) >> PAGE_SHIFT) #define pte_pfn(_pte) \ ({ \ unsigned long mfn = pte_mfn(_pte); \ unsigned pfn = mfn_to_pfn(mfn); \ - if ((pfn >= max_mapnr) || (pfn_to_mfn(pfn) != mfn)) \ + if ((pfn >= max_mapnr) || (phys_to_machine_mapping[pfn] != mfn))\ pfn = max_mapnr; /* special: force !pfn_valid() */ \ pfn; \ }) diff -r 6783e59e1c45 -r 522bc50588ed linux-2.6-xen-sparse/include/asm-xen/gnttab.h --- a/linux-2.6-xen-sparse/include/asm-xen/gnttab.h Tue Aug 23 18:25:51 2005 +++ b/linux-2.6-xen-sparse/include/asm-xen/gnttab.h Tue Aug 23 18:27:22 2005 @@ -19,54 +19,46 @@ /* NR_GRANT_FRAMES must be less than or equal to that configured in Xen */ #define NR_GRANT_FRAMES 4 -#define NR_GRANT_ENTRIES (NR_GRANT_FRAMES * PAGE_SIZE / sizeof(grant_entry_t)) -int -gnttab_grant_foreign_access( - domid_t domid, unsigned long frame, int readonly); +struct gnttab_free_callback { + struct gnttab_free_callback *next; + void (*fn)(void *); + void *arg; + u16 count; +}; -void -gnttab_end_foreign_access( - grant_ref_t ref, int readonly); +int gnttab_grant_foreign_access(domid_t domid, unsigned long frame, + int readonly); -int -gnttab_grant_foreign_transfer( - domid_t domid, unsigned long pfn); +void gnttab_end_foreign_access(grant_ref_t ref, int readonly); -unsigned long -gnttab_end_foreign_transfer( - grant_ref_t ref); +int gnttab_grant_foreign_transfer(domid_t domid, unsigned long pfn); -int -gnttab_query_foreign_access( - grant_ref_t ref ); +unsigned long gnttab_end_foreign_transfer(grant_ref_t ref); + +int gnttab_query_foreign_access(grant_ref_t ref); /* * operations on reserved batches of grant references */ -int -gnttab_alloc_grant_references( - u16 count, grant_ref_t *pprivate_head, grant_ref_t *private_terminal ); +int gnttab_alloc_grant_references(u16 count, grant_ref_t *pprivate_head); -void -gnttab_free_grant_references( - u16 count, grant_ref_t private_head ); +void gnttab_free_grant_reference(grant_ref_t ref); -int -gnttab_claim_grant_reference( grant_ref_t *pprivate_head, grant_ref_t terminal -); +void gnttab_free_grant_references(grant_ref_t head); -void -gnttab_release_grant_reference( - grant_ref_t *private_head, grant_ref_t release ); +int gnttab_claim_grant_reference(grant_ref_t *pprivate_head); -void -gnttab_grant_foreign_access_ref( - grant_ref_t ref, domid_t domid, unsigned long frame, int readonly); +void gnttab_release_grant_reference(grant_ref_t *private_head, + grant_ref_t release); -void -gnttab_grant_foreign_transfer_ref( - grant_ref_t, domid_t domid, unsigned long pfn); +void gnttab_request_free_callback(struct gnttab_free_callback *callback, + void (*fn)(void *), void *arg, u16 count); +void gnttab_grant_foreign_access_ref(grant_ref_t ref, domid_t domid, + unsigned long frame, int readonly); + +void gnttab_grant_foreign_transfer_ref(grant_ref_t, domid_t domid, + unsigned long pfn); #endif /* __ASM_GNTTAB_H__ */ diff -r 6783e59e1c45 -r 522bc50588ed tools/xenstat/xentop/Makefile --- a/tools/xenstat/xentop/Makefile Tue Aug 23 18:25:51 2005 +++ b/tools/xenstat/xentop/Makefile Tue Aug 23 18:27:22 2005 @@ -28,7 +28,7 @@ CFLAGS += -DGCC_PRINTF -Wall -Werror -I$(XEN_LIBXENSTAT) LDFLAGS += -L$(XEN_LIBXENSTAT) -LDLIBS += -lxenstat -lcurses +LDLIBS += -lxenstat -lncurses all: xentop diff -r 6783e59e1c45 -r 522bc50588ed xen/arch/x86/io_apic.c --- a/xen/arch/x86/io_apic.c Tue Aug 23 18:25:51 2005 +++ b/xen/arch/x86/io_apic.c Tue Aug 23 18:27:22 2005 @@ -1751,8 +1751,30 @@ pin = (address - 0x10) >> 1; + *(u32 *)&rte = val; rte.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS); - *(int *)&rte = val; + + /* + * What about weird destination types? + * SMI: Ignore? Ought to be set up by the BIOS. + * NMI: Ignore? Watchdog functionality is Xen's concern. + * INIT: Definitely ignore: probably a guest OS bug. + * ExtINT: Ignore? Linux only asserts this at start of day. + * For now, print a message and return an error. We can fix up on demand. + */ + if ( rte.delivery_mode > dest_LowestPrio ) + { + printk("ERROR: Attempt to write weird IOAPIC destination mode!\n"); + printk(" APIC=%d/%d, lo-reg=%x\n", apicid, pin, val); + return -EINVAL; + } + + /* + * The guest does not know physical APIC arrangement (flat vs. cluster). + * Apply genapic conventions for this platform. + */ + rte.delivery_mode = INT_DELIVERY_MODE; + rte.dest_mode = INT_DEST_MODE; if ( rte.vector >= FIRST_DEVICE_VECTOR ) { diff -r 6783e59e1c45 -r 522bc50588ed xen/arch/x86/mm.c --- a/xen/arch/x86/mm.c Tue Aug 23 18:25:51 2005 +++ b/xen/arch/x86/mm.c Tue Aug 23 18:27:22 2005 @@ -444,7 +444,7 @@ if ( unlikely(l1e_get_flags(l1e) & L1_DISALLOW_MASK) ) { - MEM_LOG("Bad L1 flags %x\n", l1e_get_flags(l1e) & L1_DISALLOW_MASK); + MEM_LOG("Bad L1 flags %x", l1e_get_flags(l1e) & L1_DISALLOW_MASK); return 0; } @@ -490,7 +490,7 @@ if ( unlikely((l2e_get_flags(l2e) & L2_DISALLOW_MASK)) ) { - MEM_LOG("Bad L2 flags %x\n", l2e_get_flags(l2e) & L2_DISALLOW_MASK); + MEM_LOG("Bad L2 flags %x", l2e_get_flags(l2e) & L2_DISALLOW_MASK); return 0; } @@ -523,7 +523,7 @@ if ( unlikely((l3e_get_flags(l3e) & L3_DISALLOW_MASK)) ) { - MEM_LOG("Bad L3 flags %x\n", l3e_get_flags(l3e) & L3_DISALLOW_MASK); + MEM_LOG("Bad L3 flags %x", l3e_get_flags(l3e) & L3_DISALLOW_MASK); return 0; } @@ -557,7 +557,7 @@ if ( unlikely((l4e_get_flags(l4e) & L4_DISALLOW_MASK)) ) { - MEM_LOG("Bad L4 flags %x\n", l4e_get_flags(l4e) & L4_DISALLOW_MASK); + MEM_LOG("Bad L4 flags %x", l4e_get_flags(l4e) & L4_DISALLOW_MASK); return 0; } @@ -1025,7 +1025,7 @@ unlikely(o != l1e_get_intpte(ol1e)) ) { MEM_LOG("Failed to update %" PRIpte " -> %" PRIpte - ": saw %" PRIpte "\n", + ": saw %" PRIpte, l1e_get_intpte(ol1e), l1e_get_intpte(nl1e), o); @@ -1051,7 +1051,7 @@ { if ( unlikely(l1e_get_flags(nl1e) & L1_DISALLOW_MASK) ) { - MEM_LOG("Bad L1 flags %x\n", + MEM_LOG("Bad L1 flags %x", l1e_get_flags(nl1e) & L1_DISALLOW_MASK); return 0; } @@ -1113,7 +1113,7 @@ { if ( unlikely(l2e_get_flags(nl2e) & L2_DISALLOW_MASK) ) { - MEM_LOG("Bad L2 flags %x\n", + MEM_LOG("Bad L2 flags %x", l2e_get_flags(nl2e) & L2_DISALLOW_MASK); return 0; } @@ -1175,7 +1175,7 @@ { if ( unlikely(l3e_get_flags(nl3e) & L3_DISALLOW_MASK) ) { - MEM_LOG("Bad L3 flags %x\n", + MEM_LOG("Bad L3 flags %x", l3e_get_flags(nl3e) & L3_DISALLOW_MASK); return 0; } @@ -1237,7 +1237,7 @@ { if ( unlikely(l4e_get_flags(nl4e) & L4_DISALLOW_MASK) ) { - MEM_LOG("Bad L4 flags %x\n", + MEM_LOG("Bad L4 flags %x", l4e_get_flags(nl4e) & L4_DISALLOW_MASK); return 0; } @@ -1598,7 +1598,7 @@ percpu_info[cpu].foreign = dom_io; break; default: - MEM_LOG("Dom %u cannot set foreign dom\n", d->domain_id); + MEM_LOG("Dom %u cannot set foreign dom", d->domain_id); okay = 0; break; } @@ -1831,7 +1831,7 @@ case MMUEXT_FLUSH_CACHE: if ( unlikely(!IS_CAPABLE_PHYSDEV(d)) ) { - MEM_LOG("Non-physdev domain tried to FLUSH_CACHE.\n"); + MEM_LOG("Non-physdev domain tried to FLUSH_CACHE."); okay = 0; } else @@ -1845,7 +1845,7 @@ if ( shadow_mode_external(d) ) { MEM_LOG("ignoring SET_LDT hypercall from external " - "domain %u\n", d->domain_id); + "domain %u", d->domain_id); okay = 0; break; } @@ -1916,7 +1916,7 @@ unlikely(IS_XEN_HEAP_FRAME(page)) ) { MEM_LOG("Transferee has no reservation headroom (%d,%d), or " - "page is in Xen heap (%lx), or dom is dying (%ld).\n", + "page is in Xen heap (%lx), or dom is dying (%ld).", e->tot_pages, e->max_pages, op.mfn, e->domain_flags); okay = 0; goto reassign_fail; @@ -1937,7 +1937,7 @@ unlikely(_nd != _d) ) { MEM_LOG("Bad page values %lx: ed=%p(%u), sd=%p," - " caf=%08x, taf=%" PRtype_info "\n", + " caf=%08x, taf=%" PRtype_info, page_to_pfn(page), d, d->domain_id, unpickle_domptr(_nd), x, page->u.inuse.type_info); okay = 0; @@ -2301,7 +2301,7 @@ if ( ((type_info & PGT_type_mask) != PGT_l1_page_table) || !get_page_type(page, type_info & (PGT_type_mask|PGT_va_mask)) ) { - DPRINTK("Grant map attempted to update a non-L1 page\n"); + MEM_LOG("Grant map attempted to update a non-L1 page"); rc = GNTST_general_error; goto failed; } @@ -2363,7 +2363,7 @@ if ( ((type_info & PGT_type_mask) != PGT_l1_page_table) || !get_page_type(page, type_info & (PGT_type_mask|PGT_va_mask)) ) { - DPRINTK("Grant map attempted to update a non-L1 page\n"); + MEM_LOG("Grant map attempted to update a non-L1 page"); rc = GNTST_general_error; goto failed; } @@ -2378,7 +2378,7 @@ /* Check that the virtual address supplied is actually mapped to frame. */ if ( unlikely((l1e_get_intpte(ol1e) >> PAGE_SHIFT) != frame) ) { - DPRINTK("PTE entry %lx for address %lx doesn't match frame %lx\n", + MEM_LOG("PTE entry %lx for address %lx doesn't match frame %lx", (unsigned long)l1e_get_intpte(ol1e), addr, frame); put_page_type(page); rc = GNTST_general_error; @@ -2388,7 +2388,7 @@ /* Delete pagetable entry. */ if ( unlikely(__put_user(0, (intpte_t *)va))) { - DPRINTK("Cannot delete PTE entry at %p.\n", va); + MEM_LOG("Cannot delete PTE entry at %p", va); put_page_type(page); rc = GNTST_general_error; goto failed; @@ -2452,7 +2452,7 @@ if ( unlikely(__get_user(ol1e.l1, &pl1e->l1) != 0) ) { - DPRINTK("Could not find PTE entry for address %lx\n", addr); + MEM_LOG("Could not find PTE entry for address %lx", addr); return GNTST_general_error; } @@ -2462,7 +2462,7 @@ */ if ( unlikely(l1e_get_pfn(ol1e) != frame) ) { - DPRINTK("PTE entry %lx for address %lx doesn't match frame %lx\n", + MEM_LOG("PTE entry %lx for address %lx doesn't match frame %lx", l1e_get_pfn(ol1e), addr, frame); return GNTST_general_error; } @@ -2470,7 +2470,7 @@ /* Delete pagetable entry. */ if ( unlikely(__put_user(0, &pl1e->l1)) ) { - DPRINTK("Cannot delete PTE entry at %p.\n", (unsigned long *)pl1e); + MEM_LOG("Cannot delete PTE entry at %p", (unsigned long *)pl1e); return GNTST_general_error; } @@ -2930,7 +2930,7 @@ if ( unlikely(!get_page_from_l1e(nl1e, d)) ) { - MEM_LOG("ptwr: Could not re-validate l1 page\n"); + MEM_LOG("ptwr: Could not re-validate l1 page"); /* * Make the remaining p.t's consistent before crashing, so the * reference counts are correct. @@ -3056,7 +3056,7 @@ /* Aligned access only, thank you. */ if ( !access_ok(addr, bytes) || ((addr & (bytes-1)) != 0) ) { - MEM_LOG("ptwr_emulate: Unaligned or bad size ptwr access (%d, %lx)\n", + MEM_LOG("ptwr_emulate: Unaligned or bad size ptwr access (%d, %lx)", bytes, addr); return X86EMUL_UNHANDLEABLE; } @@ -3089,7 +3089,7 @@ if (__copy_from_user(&pte, &linear_pg_table[l1_linear_offset(addr)], sizeof(pte))) { - MEM_LOG("ptwr_emulate: Cannot read thru linear_pg_table\n"); + MEM_LOG("ptwr_emulate: Cannot read thru linear_pg_table"); return X86EMUL_UNHANDLEABLE; } @@ -3102,7 +3102,7 @@ (page_get_owner(page) != d) ) { MEM_LOG("ptwr_emulate: Page is mistyped or bad pte " - "(%lx, %" PRtype_info ")\n", + "(%lx, %" PRtype_info ")", l1e_get_pfn(pte), page->u.inuse.type_info); return X86EMUL_UNHANDLEABLE; } diff -r 6783e59e1c45 -r 522bc50588ed xen/arch/x86/vmx.c --- a/xen/arch/x86/vmx.c Tue Aug 23 18:25:51 2005 +++ b/xen/arch/x86/vmx.c Tue Aug 23 18:27:22 2005 @@ -1712,9 +1712,6 @@ default: __vmx_bug(®s); /* should not happen */ } - - vmx_intr_assist(v); - return; } asmlinkage void load_cr2(void) diff -r 6783e59e1c45 -r 522bc50588ed xen/arch/x86/vmx_io.c --- a/xen/arch/x86/vmx_io.c Tue Aug 23 18:25:51 2005 +++ b/xen/arch/x86/vmx_io.c Tue Aug 23 18:27:22 2005 @@ -631,12 +631,14 @@ return ((eflags & X86_EFLAGS_IF) == 0); } -void vmx_intr_assist(struct vcpu *v) +asmlinkage void vmx_intr_assist(void) { int intr_type = 0; - int highest_vector = find_highest_pending_irq(v, &intr_type); + int highest_vector; unsigned long intr_fields, eflags, interruptibility, cpu_exec_control; - + struct vcpu *v = current; + + highest_vector = find_highest_pending_irq(v, &intr_type); __vmread(CPU_BASED_VM_EXEC_CONTROL, &cpu_exec_control); if (highest_vector == -1) { @@ -712,9 +714,6 @@ /* We can't resume the guest if we're waiting on I/O */ ASSERT(!test_bit(ARCH_VMX_IO_WAIT, &d->arch.arch_vmx.flags)); - - /* We always check for interrupts before resuming guest */ - vmx_intr_assist(d); } #endif /* CONFIG_VMX */ diff -r 6783e59e1c45 -r 522bc50588ed xen/arch/x86/x86_32/entry.S --- a/xen/arch/x86/x86_32/entry.S Tue Aug 23 18:25:51 2005 +++ b/xen/arch/x86/x86_32/entry.S Tue Aug 23 18:27:22 2005 @@ -140,6 +140,7 @@ jnz 2f /* vmx_restore_all_guest */ + call vmx_intr_assist call load_cr2 .endif VMX_RESTORE_ALL_NOSEGREGS diff -r 6783e59e1c45 -r 522bc50588ed xen/arch/x86/x86_32/traps.c --- a/xen/arch/x86/x86_32/traps.c Tue Aug 23 18:25:51 2005 +++ b/xen/arch/x86/x86_32/traps.c Tue Aug 23 18:27:22 2005 @@ -1,5 +1,6 @@ #include <xen/config.h> +#include <xen/domain_page.h> #include <xen/init.h> #include <xen/sched.h> #include <xen/lib.h> @@ -86,24 +87,33 @@ void show_page_walk(unsigned long addr) { - l2_pgentry_t pmd; - l1_pgentry_t *pte; - - if ( addr < PAGE_OFFSET ) - return; + unsigned long pfn = read_cr3() >> PAGE_SHIFT; + intpte_t *ptab, ent; printk("Pagetable walk from %08lx:\n", addr); - - pmd = idle_pg_table_l2[l2_linear_offset(addr)]; - printk(" L2 = %"PRIpte" %s\n", l2e_get_intpte(pmd), - (l2e_get_flags(pmd) & _PAGE_PSE) ? "(2/4MB)" : ""); - if ( !(l2e_get_flags(pmd) & _PAGE_PRESENT) || - (l2e_get_flags(pmd) & _PAGE_PSE) ) - return; - - pte = __va(l2e_get_paddr(pmd)); - pte += l1_table_offset(addr); - printk(" L1 = %"PRIpte"\n", l1e_get_intpte(*pte)); + +#ifdef CONFIG_X86_PAE + ptab = map_domain_page(pfn); + ent = ptab[l3_table_offset(addr)]; + printk(" L3 = %"PRIpte"\n", ent); + unmap_domain_page(ptab); + if ( !(ent & _PAGE_PRESENT) ) + return; + pfn = ent >> PAGE_SHIFT; +#endif + + ptab = map_domain_page(pfn); + ent = ptab[l2_table_offset(addr)]; + printk(" L2 = %"PRIpte" %s\n", ent, (ent & _PAGE_PSE) ? "(PSE)" : ""); + unmap_domain_page(ptab); + if ( !(ent & _PAGE_PRESENT) || (ent & _PAGE_PSE) ) + return; + pfn = ent >> PAGE_SHIFT; + + ptab = map_domain_page(ent >> PAGE_SHIFT); + ent = ptab[l2_table_offset(addr)]; + printk(" L1 = %"PRIpte"\n", ent); + unmap_domain_page(ptab); } #define DOUBLEFAULT_STACK_SIZE 1024 diff -r 6783e59e1c45 -r 522bc50588ed xen/arch/x86/x86_64/entry.S --- a/xen/arch/x86/x86_64/entry.S Tue Aug 23 18:25:51 2005 +++ b/xen/arch/x86/x86_64/entry.S Tue Aug 23 18:27:22 2005 @@ -233,6 +233,7 @@ jnz 2f /* vmx_restore_all_guest */ + call vmx_intr_assist call load_cr2 .endif /* diff -r 6783e59e1c45 -r 522bc50588ed xen/include/asm-x86/vmx.h --- a/xen/include/asm-x86/vmx.h Tue Aug 23 18:25:51 2005 +++ b/xen/include/asm-x86/vmx.h Tue Aug 23 18:27:22 2005 @@ -31,7 +31,7 @@ extern void vmx_asm_vmexit_handler(struct cpu_user_regs); extern void vmx_asm_do_resume(void); extern void vmx_asm_do_launch(void); -extern void vmx_intr_assist(struct vcpu *d); +extern void vmx_intr_assist(void); extern void arch_vmx_do_launch(struct vcpu *); extern void arch_vmx_do_resume(struct vcpu *); @@ -355,7 +355,7 @@ } /* Make sure that xen intercepts any FP accesses from current */ -static inline void vmx_stts() +static inline void vmx_stts(void) { unsigned long cr0; diff -r 6783e59e1c45 -r 522bc50588ed xen/include/public/io/blkif.h --- a/xen/include/public/io/blkif.h Tue Aug 23 18:25:51 2005 +++ b/xen/include/public/io/blkif.h Tue Aug 23 18:27:22 2005 @@ -58,6 +58,9 @@ #define BLKIF_RSP_ERROR -1 /* non-specific 'error' */ #define BLKIF_RSP_OKAY 0 /* non-specific 'okay' */ +#define BLKIF_MAJOR(dev) ((dev)>>8) +#define BLKIF_MINOR(dev) ((dev) & 0xff) + /* * Generate blkif ring structures and types. */ diff -r 6783e59e1c45 -r 522bc50588ed linux-2.6-xen-sparse/arch/xen/x86_64/mm/ioremap.c --- a/linux-2.6-xen-sparse/arch/xen/x86_64/mm/ioremap.c Tue Aug 23 18:25:51 2005 +++ /dev/null Tue Aug 23 18:27:22 2005 @@ -1,499 +0,0 @@ -/* - * arch/x86_64/mm/ioremap.c - * - * Re-map IO memory to kernel address space so that we can access it. - * This is needed for high PCI addresses that aren't mapped in the - * 640k-1MB IO memory area on PC's - * - * (C) Copyright 1995 1996 Linus Torvalds - */ - -#include <linux/vmalloc.h> -#include <linux/init.h> -#include <linux/slab.h> -#include <linux/module.h> -#include <asm/io.h> -#include <asm/fixmap.h> -#include <asm/cacheflush.h> -#include <asm/tlbflush.h> -#include <asm/pgtable.h> -#include <asm/pgalloc.h> - -/* - * Reuse arch/xen/i396/mm/ioremap.c. Need to merge later - */ -#ifndef CONFIG_XEN_PHYSDEV_ACCESS - -void * __ioremap(unsigned long phys_addr, unsigned long size, - unsigned long flags) -{ - return NULL; -} - -void *ioremap_nocache (unsigned long phys_addr, unsigned long size) -{ - return NULL; -} - -void iounmap(volatile void __iomem *addr) -{ -} - -void __init *bt_ioremap(unsigned long phys_addr, unsigned long size) -{ - return NULL; -} - -void __init bt_iounmap(void *addr, unsigned long size) -{ -} - -#else - -#if defined(__i386__) -/* - * Does @address reside within a non-highmem page that is local to this virtual - * machine (i.e., not an I/O page, nor a memory page belonging to another VM). - * See the comment that accompanies pte_pfn() in pgtable-2level.h to understand - * why this works. - */ -static inline int is_local_lowmem(unsigned long address) -{ - extern unsigned long max_low_pfn; - unsigned long mfn = address >> PAGE_SHIFT; - unsigned long pfn = mfn_to_pfn(mfn); - return ((pfn < max_low_pfn) && (pfn_to_mfn(pfn) == mfn)); -} -#elif defined(__x86_64__) -/* - * - */ -static inline int is_local_lowmem(unsigned long address) -{ - return 0; -} -#endif - -/* - * Generic mapping function (not visible outside): - */ - -/* - * Remap an arbitrary physical address space into the kernel virtual - * address space. Needed when the kernel wants to access high addresses - * directly. - * - * NOTE! We need to allow non-page-aligned mappings too: we will obviously - * have to convert them into an offset in a page-aligned mapping, but the - * caller shouldn't need to know that small detail. - */ -void __iomem * __ioremap(unsigned long phys_addr, unsigned long size, unsigned long flags) -{ - void __iomem * addr; - struct vm_struct * area; - unsigned long offset, last_addr; - domid_t domid = DOMID_IO; - - /* Don't allow wraparound or zero size */ - last_addr = phys_addr + size - 1; - if (!size || last_addr < phys_addr) - return NULL; - -#ifdef CONFIG_XEN_PRIVILEGED_GUEST - /* - * Don't remap the low PCI/ISA area, it's always mapped.. - */ - if (phys_addr >= 0x0 && last_addr < 0x100000) - return isa_bus_to_virt(phys_addr); -#endif - - /* - * Don't allow anybody to remap normal RAM that we're using.. - */ - if (is_local_lowmem(phys_addr)) { - char *t_addr, *t_end; - struct page *page; - - t_addr = bus_to_virt(phys_addr); - t_end = t_addr + (size - 1); - - for(page = virt_to_page(t_addr); page <= virt_to_page(t_end); page++) - if(!PageReserved(page)) - return NULL; - - domid = DOMID_LOCAL; - } - - /* - * Mappings have to be page-aligned - */ - offset = phys_addr & ~PAGE_MASK; - phys_addr &= PAGE_MASK; - size = PAGE_ALIGN(last_addr+1) - phys_addr; - - /* - * Ok, go for it.. - */ - area = get_vm_area(size, VM_IOREMAP | (flags << 20)); - if (!area) - return NULL; - area->phys_addr = phys_addr; - addr = (void __iomem *) area->addr; - if (direct_remap_area_pages(&init_mm, (unsigned long) addr, phys_addr, - size, __pgprot(_PAGE_PRESENT | _PAGE_RW | - _PAGE_DIRTY | _PAGE_ACCESSED -#if defined(__x86_64__) - | _PAGE_USER -#endif - | flags), domid)) { - vunmap((void __force *) addr); - return NULL; - } - return (void __iomem *) (offset + (char __iomem *)addr); -} - - -/** - * ioremap_nocache - map bus memory into CPU space - * @offset: bus address of the memory - * @size: size of the resource to map - * - * ioremap_nocache performs a platform specific sequence of operations to - * make bus memory CPU accessible via the readb/readw/readl/writeb/ - * writew/writel functions and the other mmio helpers. The returned - * address is not guaranteed to be usable directly as a virtual - * address. - * - * This version of ioremap ensures that the memory is marked uncachable - * on the CPU as well as honouring existing caching rules from things like - * the PCI bus. Note that there are other caches and buffers on many - * busses. In particular driver authors should read up on PCI writes - * - * It's useful if some control registers are in such an area and - * write combining or read caching is not desirable: - * - * Must be freed with iounmap. - */ - -void __iomem *ioremap_nocache (unsigned long phys_addr, unsigned long size) -{ - unsigned long last_addr; - void __iomem *p = __ioremap(phys_addr, size, _PAGE_PCD); - if (!p) - return p; - - /* Guaranteed to be > phys_addr, as per __ioremap() */ - last_addr = phys_addr + size - 1; - - if (is_local_lowmem(last_addr)) { - struct page *ppage = virt_to_page(bus_to_virt(phys_addr)); - unsigned long npages; - - phys_addr &= PAGE_MASK; - - /* This might overflow and become zero.. */ - last_addr = PAGE_ALIGN(last_addr); - - /* .. but that's ok, because modulo-2**n arithmetic will make - * the page-aligned "last - first" come out right. - */ - npages = (last_addr - phys_addr) >> PAGE_SHIFT; - - if (change_page_attr(ppage, npages, PAGE_KERNEL_NOCACHE) < 0) { - iounmap(p); - p = NULL; - } - global_flush_tlb(); - } - - return p; -} - -void iounmap(volatile void __iomem *addr) -{ - struct vm_struct *p; - if ((void __force *) addr <= high_memory) - return; -#ifdef CONFIG_XEN_PRIVILEGED_GUEST - if ((unsigned long) addr >= fix_to_virt(FIX_ISAMAP_BEGIN)) - return; -#endif - p = remove_vm_area((void *) (PAGE_MASK & (unsigned long __force) addr)); - if (!p) { - printk("__iounmap: bad address %p\n", addr); - return; - } - - if ((p->flags >> 20) && is_local_lowmem(p->phys_addr)) { - /* p->size includes the guard page, but cpa doesn't like that */ - change_page_attr(virt_to_page(bus_to_virt(p->phys_addr)), - (p->size - PAGE_SIZE) >> PAGE_SHIFT, - PAGE_KERNEL); - global_flush_tlb(); - } - kfree(p); -} - -#if defined(__i386__) -void __init *bt_ioremap(unsigned long phys_addr, unsigned long size) -{ - unsigned long offset, last_addr; - unsigned int nrpages; - enum fixed_addresses idx; - - /* Don't allow wraparound or zero size */ - last_addr = phys_addr + size - 1; - if (!size || last_addr < phys_addr) - return NULL; - -#ifdef CONFIG_XEN_PRIVILEGED_GUEST - /* - * Don't remap the low PCI/ISA area, it's always mapped.. - */ - if (phys_addr >= 0x0 && last_addr < 0x100000) - return isa_bus_to_virt(phys_addr); -#endif - - /* - * Mappings have to be page-aligned - */ - offset = phys_addr & ~PAGE_MASK; - phys_addr &= PAGE_MASK; - size = PAGE_ALIGN(last_addr) - phys_addr; - - /* - * Mappings have to fit in the FIX_BTMAP area. - */ - nrpages = size >> PAGE_SHIFT; - if (nrpages > NR_FIX_BTMAPS) - return NULL; - - /* - * Ok, go for it.. - */ - idx = FIX_BTMAP_BEGIN; - while (nrpages > 0) { - set_fixmap(idx, phys_addr); - phys_addr += PAGE_SIZE; - --idx; - --nrpages; - } - return (void*) (offset + fix_to_virt(FIX_BTMAP_BEGIN)); -} - -void __init bt_iounmap(void *addr, unsigned long size) -{ - unsigned long virt_addr; - unsigned long offset; - unsigned int nrpages; - enum fixed_addresses idx; - - virt_addr = (unsigned long)addr; - if (virt_addr < fix_to_virt(FIX_BTMAP_BEGIN)) - return; -#ifdef CONFIG_XEN_PRIVILEGED_GUEST - if (virt_addr >= fix_to_virt(FIX_ISAMAP_BEGIN)) - return; -#endif - offset = virt_addr & ~PAGE_MASK; - nrpages = PAGE_ALIGN(offset + size - 1) >> PAGE_SHIFT; - - idx = FIX_BTMAP_BEGIN; - while (nrpages > 0) { - clear_fixmap(idx); - --idx; - --nrpages; - } -} -#endif /* defined(__i386__) */ - -#endif /* CONFIG_XEN_PHYSDEV_ACCESS */ - -/* These hacky macros avoid phys->machine translations. */ -#define __direct_pte(x) ((pte_t) { (x) } ) -#define __direct_mk_pte(page_nr,pgprot) \ - __direct_pte(((page_nr) << PAGE_SHIFT) | pgprot_val(pgprot)) -#define direct_mk_pte_phys(physpage, pgprot) \ - __direct_mk_pte((physpage) >> PAGE_SHIFT, pgprot) - -static inline void direct_remap_area_pte(pte_t *pte, - unsigned long address, - unsigned long size, - mmu_update_t **v) -{ - unsigned long end; - - address &= ~PMD_MASK; - end = address + size; - if (end > PMD_SIZE) - end = PMD_SIZE; - if (address >= end) - BUG(); - - do { - (*v)->ptr = virt_to_machine(pte); - (*v)++; - address += PAGE_SIZE; - pte++; - } while (address && (address < end)); -} - -static inline int direct_remap_area_pmd(struct mm_struct *mm, - pmd_t *pmd, - unsigned long address, - unsigned long size, - mmu_update_t **v) -{ - unsigned long end; - - address &= ~PGDIR_MASK; - end = address + size; - if (end > PGDIR_SIZE) - end = PGDIR_SIZE; - if (address >= end) - BUG(); - do { - pte_t *pte = (mm == &init_mm) ? - pte_alloc_kernel(mm, pmd, address) : - pte_alloc_map(mm, pmd, address); - if (!pte) - return -ENOMEM; - direct_remap_area_pte(pte, address, end - address, v); - pte_unmap(pte); - address = (address + PMD_SIZE) & PMD_MASK; - pmd++; - } while (address && (address < end)); - return 0; -} - -int __direct_remap_area_pages(struct mm_struct *mm, - unsigned long address, - unsigned long size, - mmu_update_t *v) -{ - pgd_t * dir; - unsigned long end = address + size; - int error; - -#if defined(__i386__) - dir = pgd_offset(mm, address); -#elif defined (__x86_64) - dir = (mm == &init_mm) ? - pgd_offset_k(address): - pgd_offset(mm, address); -#endif - if (address >= end) - BUG(); - spin_lock(&mm->page_table_lock); - do { - pud_t *pud; - pmd_t *pmd; - - error = -ENOMEM; - pud = pud_alloc(mm, dir, address); - if (!pud) - break; - pmd = pmd_alloc(mm, pud, address); - if (!pmd) - break; - error = 0; - direct_remap_area_pmd(mm, pmd, address, end - address, &v); - address = (address + PGDIR_SIZE) & PGDIR_MASK; - dir++; - - } while (address && (address < end)); - spin_unlock(&mm->page_table_lock); - return error; -} - - -int direct_remap_area_pages(struct mm_struct *mm, - unsigned long address, - unsigned long machine_addr, - unsigned long size, - pgprot_t prot, - domid_t domid) -{ - int i; - unsigned long start_address; -#define MAX_DIRECTMAP_MMU_QUEUE 130 - mmu_update_t u[MAX_DIRECTMAP_MMU_QUEUE], *v = u; - - start_address = address; - - flush_cache_all(); - - for (i = 0; i < size; i += PAGE_SIZE) { - if ((v - u) == MAX_DIRECTMAP_MMU_QUEUE) { - /* Fill in the PTE pointers. */ - __direct_remap_area_pages(mm, - start_address, - address-start_address, - u); - - if (HYPERVISOR_mmu_update(u, v - u, NULL, domid) < 0) - return -EFAULT; - v = u; - start_address = address; - } - - /* - * Fill in the machine address: PTE ptr is done later by - * __direct_remap_area_pages(). - */ - v->val = (machine_addr & PAGE_MASK) | pgprot_val(prot); - - machine_addr += PAGE_SIZE; - address += PAGE_SIZE; - v++; - } - - if (v != u) { - /* get the ptep's filled in */ - __direct_remap_area_pages(mm, - start_address, - address-start_address, - u); - if (unlikely(HYPERVISOR_mmu_update(u, v - u, NULL, domid) < 0)) - return -EFAULT; - } - - flush_tlb_all(); - - return 0; -} - -EXPORT_SYMBOL(direct_remap_area_pages); - -static int lookup_pte_fn( - pte_t *pte, struct page *pte_page, unsigned long addr, void *data) -{ - unsigned long *ptep = (unsigned long *)data; - if (ptep) *ptep = (pfn_to_mfn(page_to_pfn(pte_page)) << PAGE_SHIFT) - | ((unsigned long)pte & ~PAGE_MASK); - return 0; -} - -int create_lookup_pte_addr(struct mm_struct *mm, - unsigned long address, - unsigned long *ptep) -{ - return generic_page_range(mm, address, PAGE_SIZE, lookup_pte_fn, ptep); -} - -EXPORT_SYMBOL(create_lookup_pte_addr); - -static int noop_fn( - pte_t *pte, struct page *pte_page, unsigned long addr, void *data) -{ - return 0; -} - -int touch_pte_range(struct mm_struct *mm, - unsigned long address, - unsigned long size) -{ - return generic_page_range(mm, address, size, noop_fn, NULL); -} - -EXPORT_SYMBOL(touch_pte_range); _______________________________________________ Xen-changelog mailing list Xen-changelog@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-changelog
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |