[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-changelog] Merge.
# HG changeset patch # User adsharma@xxxxxxxxxxxxxxxxxxxx # Node ID cc5f88b719d038555e62460bbdf9d38e13b953ac # Parent cd984b3478f6403ad37ae2b72a246169de337b5c # Parent 522bc50588eda1c0bba0562a16fe8edd1a715f09 Merge. diff -r cd984b3478f6 -r cc5f88b719d0 .hgignore --- a/.hgignore Mon Aug 22 18:37:48 2005 +++ b/.hgignore Tue Aug 23 19:03:21 2005 @@ -147,6 +147,7 @@ ^tools/xcs/xcsdump$ ^tools/xcutils/xc_restore$ ^tools/xcutils/xc_save$ +^tools/xenstat/xentop/xentop$ ^tools/xenstore/testsuite/tmp/.*$ ^tools/xenstore/xen$ ^tools/xenstore/xenstored$ diff -r cd984b3478f6 -r cc5f88b719d0 Config.mk --- a/Config.mk Mon Aug 22 18:37:48 2005 +++ b/Config.mk Tue Aug 23 19:03:21 2005 @@ -14,6 +14,7 @@ CC = $(CROSS_COMPILE)gcc CPP = $(CROSS_COMPILE)gcc -E AR = $(CROSS_COMPILE)ar +RANLIB = $(CROSS_COMPILE)ranlib NM = $(CROSS_COMPILE)nm STRIP = $(CROSS_COMPILE)strip OBJCOPY = $(CROSS_COMPILE)objcopy @@ -43,3 +44,7 @@ # ACM_SIMPLE_TYPE_ENFORCEMENT_POLICY # ACM_CHINESE_WALL_AND_SIMPLE_TYPE_ENFORCEMENT_POLICY ACM_USE_SECURITY_POLICY ?= ACM_NULL_POLICY + +# Optional components +XENSTAT_XENTOP ?= y + diff -r cd984b3478f6 -r cc5f88b719d0 linux-2.6-xen-sparse/arch/xen/configs/xen0_defconfig_x86_64 --- a/linux-2.6-xen-sparse/arch/xen/configs/xen0_defconfig_x86_64 Mon Aug 22 18:37:48 2005 +++ b/linux-2.6-xen-sparse/arch/xen/configs/xen0_defconfig_x86_64 Tue Aug 23 19:03:21 2005 @@ -807,7 +807,107 @@ # CONFIG_USB_ARCH_HAS_HCD=y CONFIG_USB_ARCH_HAS_OHCI=y -# CONFIG_USB is not set +CONFIG_USB=y +# CONFIG_USB_DEBUG is not set + +# +# Miscellaneous USB options +# +# CONFIG_USB_DEVICEFS is not set +# CONFIG_USB_BANDWIDTH is not set +# CONFIG_USB_DYNAMIC_MINORS is not set +# CONFIG_USB_OTG is not set + +# +# USB Host Controller Drivers +# +# CONFIG_USB_EHCI_HCD is not set +CONFIG_USB_OHCI_HCD=y +# CONFIG_USB_OHCI_BIG_ENDIAN is not set +CONFIG_USB_OHCI_LITTLE_ENDIAN=y +CONFIG_USB_UHCI_HCD=y +# CONFIG_USB_SL811_HCD is not set + +# +# USB Device Class drivers +# +# CONFIG_USB_BLUETOOTH_TTY is not set +# CONFIG_USB_ACM is not set +# CONFIG_USB_PRINTER is not set + +# +# NOTE: USB_STORAGE enables SCSI, and 'SCSI disk support' may also be needed; see USB_STORAGE Help for more information +# +# CONFIG_USB_STORAGE is not set + +# +# USB Input Devices +# +CONFIG_USB_HID=y +CONFIG_USB_HIDINPUT=y +# CONFIG_HID_FF is not set +# CONFIG_USB_HIDDEV is not set +# CONFIG_USB_AIPTEK is not set +# CONFIG_USB_WACOM is not set +# CONFIG_USB_KBTAB is not set +# CONFIG_USB_POWERMATE is not set +# CONFIG_USB_MTOUCH is not set +# CONFIG_USB_EGALAX is not set +# CONFIG_USB_XPAD is not set +# CONFIG_USB_ATI_REMOTE is not set + +# +# USB Imaging devices +# +# CONFIG_USB_MDC800 is not set +# CONFIG_USB_MICROTEK is not set + +# +# USB Multimedia devices +# +# CONFIG_USB_DABUSB is not set + +# +# Video4Linux support is needed for USB Multimedia device support +# + +# +# USB Network Adapters +# +# CONFIG_USB_CATC is not set +# CONFIG_USB_KAWETH is not set +# CONFIG_USB_PEGASUS is not set +# CONFIG_USB_RTL8150 is not set +# CONFIG_USB_USBNET is not set +CONFIG_USB_MON=y + +# +# USB port drivers +# + +# +# USB Serial Converter support +# +# CONFIG_USB_SERIAL is not set + +# +# USB Miscellaneous drivers +# +# CONFIG_USB_EMI62 is not set +# CONFIG_USB_EMI26 is not set +# CONFIG_USB_AUERSWALD is not set +# CONFIG_USB_RIO500 is not set +# CONFIG_USB_LEGOTOWER is not set +# CONFIG_USB_LCD is not set +# CONFIG_USB_LED is not set +# CONFIG_USB_CYTHERM is not set +# CONFIG_USB_PHIDGETKIT is not set +# CONFIG_USB_PHIDGETSERVO is not set +# CONFIG_USB_IDMOUSE is not set + +# +# USB ATM/DSL drivers +# # # USB Gadget Support diff -r cd984b3478f6 -r cc5f88b719d0 linux-2.6-xen-sparse/arch/xen/i386/kernel/mpparse.c --- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/mpparse.c Mon Aug 22 18:37:48 2005 +++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/mpparse.c Tue Aug 23 19:03:21 2005 @@ -784,7 +784,9 @@ void __init find_smp_config (void) { +#ifndef CONFIG_XEN unsigned int address; +#endif /* * FIXME: Linux assumes you have 640K of base ram.. diff -r cd984b3478f6 -r cc5f88b719d0 linux-2.6-xen-sparse/arch/xen/i386/kernel/process.c --- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/process.c Mon Aug 22 18:37:48 2005 +++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/process.c Tue Aug 23 19:03:21 2005 @@ -149,12 +149,12 @@ if (cpu_is_offline(cpu)) { local_irq_disable(); +#if defined(CONFIG_XEN) && defined(CONFIG_HOTPLUG_CPU) /* Ack it. From this point on until we get woken up, we're not allowed to take any locks. In particular, don't printk. */ __get_cpu_var(cpu_state) = CPU_DEAD; -#if defined(CONFIG_XEN) && defined(CONFIG_HOTPLUG_CPU) /* Tell hypervisor to take vcpu down. */ HYPERVISOR_vcpu_down(cpu); #endif diff -r cd984b3478f6 -r cc5f88b719d0 linux-2.6-xen-sparse/arch/xen/i386/kernel/setup.c --- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/setup.c Mon Aug 22 18:37:48 2005 +++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/setup.c Tue Aug 23 19:03:21 2005 @@ -1575,19 +1575,20 @@ /* Make sure we have a correctly sized P->M table. */ if (max_pfn != xen_start_info.nr_pages) { phys_to_machine_mapping = alloc_bootmem_low_pages( - max_pfn * sizeof(unsigned long)); + max_pfn * sizeof(unsigned int)); if (max_pfn > xen_start_info.nr_pages) { /* set to INVALID_P2M_ENTRY */ memset(phys_to_machine_mapping, ~0, - max_pfn * sizeof(unsigned long)); + max_pfn * sizeof(unsigned int)); memcpy(phys_to_machine_mapping, - (unsigned long *)xen_start_info.mfn_list, - xen_start_info.nr_pages * sizeof(unsigned long)); + (unsigned int *)xen_start_info.mfn_list, + xen_start_info.nr_pages * sizeof(unsigned int)); } else { memcpy(phys_to_machine_mapping, - (unsigned long *)xen_start_info.mfn_list, - max_pfn * sizeof(unsigned long)); + (unsigned int *)xen_start_info.mfn_list, + max_pfn * sizeof(unsigned int)); + /* N.B. below relies on sizeof(int) == sizeof(long). */ if (HYPERVISOR_dom_mem_op( MEMOP_decrease_reservation, (unsigned long *)xen_start_info.mfn_list + max_pfn, @@ -1597,11 +1598,11 @@ free_bootmem( __pa(xen_start_info.mfn_list), PFN_PHYS(PFN_UP(xen_start_info.nr_pages * - sizeof(unsigned long)))); + sizeof(unsigned int)))); } pfn_to_mfn_frame_list = alloc_bootmem_low_pages(PAGE_SIZE); - for ( i=0, j=0; i < max_pfn; i+=(PAGE_SIZE/sizeof(unsigned long)), j++ ) + for ( i=0, j=0; i < max_pfn; i+=(PAGE_SIZE/sizeof(unsigned int)), j++ ) { pfn_to_mfn_frame_list[j] = virt_to_mfn(&phys_to_machine_mapping[i]); diff -r cd984b3478f6 -r cc5f88b719d0 linux-2.6-xen-sparse/arch/xen/i386/kernel/smpboot.c --- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/smpboot.c Mon Aug 22 18:37:48 2005 +++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/smpboot.c Tue Aug 23 19:03:21 2005 @@ -1604,6 +1604,9 @@ unbind_ipi_from_irq(CALL_FUNCTION_VECTOR); } +extern void local_setup_timer_irq(void); +extern void local_teardown_timer_irq(void); + void smp_suspend(void) { /* XXX todo: take down time and ipi's on all cpus */ diff -r cd984b3478f6 -r cc5f88b719d0 linux-2.6-xen-sparse/arch/xen/i386/mm/fault.c --- a/linux-2.6-xen-sparse/arch/xen/i386/mm/fault.c Mon Aug 22 18:37:48 2005 +++ b/linux-2.6-xen-sparse/arch/xen/i386/mm/fault.c Tue Aug 23 19:03:21 2005 @@ -281,7 +281,7 @@ siginfo_t info; /* Set the "privileged fault" bit to something sane. */ - error_code &= 3; + error_code &= ~4; error_code |= (regs->xcs & 2) << 1; if (regs->eflags & X86_EFLAGS_VM) error_code |= 4; diff -r cd984b3478f6 -r cc5f88b719d0 linux-2.6-xen-sparse/arch/xen/i386/mm/init.c --- a/linux-2.6-xen-sparse/arch/xen/i386/mm/init.c Mon Aug 22 18:37:48 2005 +++ b/linux-2.6-xen-sparse/arch/xen/i386/mm/init.c Tue Aug 23 19:03:21 2005 @@ -348,9 +348,12 @@ { unsigned long vaddr; pgd_t *pgd_base = (pgd_t *)xen_start_info.pt_base; + int i; swapper_pg_dir = pgd_base; init_mm.pgd = pgd_base; + for (i = 0; i < NR_CPUS; i++) + per_cpu(cur_pgd, i) = pgd_base; /* Enable PSE if available */ if (cpu_has_pse) { diff -r cd984b3478f6 -r cc5f88b719d0 linux-2.6-xen-sparse/arch/xen/i386/mm/ioremap.c --- a/linux-2.6-xen-sparse/arch/xen/i386/mm/ioremap.c Mon Aug 22 18:37:48 2005 +++ b/linux-2.6-xen-sparse/arch/xen/i386/mm/ioremap.c Tue Aug 23 19:03:21 2005 @@ -36,6 +36,8 @@ { } +#ifdef __i386__ + void __init *bt_ioremap(unsigned long phys_addr, unsigned long size) { return NULL; @@ -44,6 +46,8 @@ void __init bt_iounmap(void *addr, unsigned long size) { } + +#endif /* __i386__ */ #else @@ -58,7 +62,7 @@ extern unsigned long max_low_pfn; unsigned long mfn = address >> PAGE_SHIFT; unsigned long pfn = mfn_to_pfn(mfn); - return ((pfn < max_low_pfn) && (pfn_to_mfn(pfn) == mfn)); + return ((pfn < max_low_pfn) && (phys_to_machine_mapping[pfn] == mfn)); } /* @@ -126,10 +130,12 @@ return NULL; area->phys_addr = phys_addr; addr = (void __iomem *) area->addr; + flags |= _PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED; +#ifdef __x86_64__ + flags |= _PAGE_USER; +#endif if (direct_remap_area_pages(&init_mm, (unsigned long) addr, phys_addr, - size, __pgprot(_PAGE_PRESENT | _PAGE_RW | - _PAGE_DIRTY | _PAGE_ACCESSED - | flags), domid)) { + size, __pgprot(flags), domid)) { vunmap((void __force *) addr); return NULL; } @@ -218,6 +224,8 @@ kfree(p); } +#ifdef __i386__ + void __init *bt_ioremap(unsigned long phys_addr, unsigned long size) { unsigned long offset, last_addr; @@ -288,6 +296,8 @@ --nrpages; } } + +#endif /* __i386__ */ #endif /* CONFIG_XEN_PHYSDEV_ACCESS */ @@ -346,7 +356,7 @@ * Fill in the machine address: PTE ptr is done later by * __direct_remap_area_pages(). */ - v->val = (machine_addr & PAGE_MASK) | pgprot_val(prot); + v->val = pte_val_ma(pfn_pte_ma(machine_addr >> PAGE_SHIFT, prot)); machine_addr += PAGE_SIZE; address += PAGE_SIZE; @@ -368,35 +378,37 @@ EXPORT_SYMBOL(direct_remap_area_pages); +static int lookup_pte_fn( + pte_t *pte, struct page *pte_page, unsigned long addr, void *data) +{ + unsigned long *ptep = (unsigned long *)data; + if (ptep) + *ptep = (pfn_to_mfn(page_to_pfn(pte_page)) << + PAGE_SHIFT) | + ((unsigned long)pte & ~PAGE_MASK); + return 0; +} + int create_lookup_pte_addr(struct mm_struct *mm, unsigned long address, unsigned long *ptep) { - int f(pte_t *pte, struct page *pte_page, unsigned long addr, - void *data) { - unsigned long *ptep = (unsigned long *)data; - if (ptep) - *ptep = (pfn_to_mfn(page_to_pfn(pte_page)) << - PAGE_SHIFT) | - ((unsigned long)pte & ~PAGE_MASK); - return 0; - } - - return generic_page_range(mm, address, PAGE_SIZE, f, ptep); + return generic_page_range(mm, address, PAGE_SIZE, lookup_pte_fn, ptep); } EXPORT_SYMBOL(create_lookup_pte_addr); + +static int noop_fn( + pte_t *pte, struct page *pte_page, unsigned long addr, void *data) +{ + return 0; +} int touch_pte_range(struct mm_struct *mm, unsigned long address, unsigned long size) { - int f(pte_t *pte, struct page *pte_page, unsigned long addr, - void *data) { - return 0; - } - - return generic_page_range(mm, address, size, f, NULL); + return generic_page_range(mm, address, size, noop_fn, NULL); } EXPORT_SYMBOL(touch_pte_range); diff -r cd984b3478f6 -r cc5f88b719d0 linux-2.6-xen-sparse/arch/xen/kernel/evtchn.c --- a/linux-2.6-xen-sparse/arch/xen/kernel/evtchn.c Mon Aug 22 18:37:48 2005 +++ b/linux-2.6-xen-sparse/arch/xen/kernel/evtchn.c Tue Aug 23 19:03:21 2005 @@ -435,9 +435,11 @@ unbind_evtchn_from_irq(evtchn); } +#ifdef CONFIG_SMP static void do_nothing_function(void *ign) { } +#endif /* Rebind an evtchn so that it gets delivered to a specific cpu */ static void rebind_irq_to_cpu(unsigned irq, unsigned tcpu) diff -r cd984b3478f6 -r cc5f88b719d0 linux-2.6-xen-sparse/arch/xen/kernel/gnttab.c --- a/linux-2.6-xen-sparse/arch/xen/kernel/gnttab.c Mon Aug 22 18:37:48 2005 +++ b/linux-2.6-xen-sparse/arch/xen/kernel/gnttab.c Tue Aug 23 19:03:21 2005 @@ -40,38 +40,82 @@ EXPORT_SYMBOL(gnttab_end_foreign_transfer); EXPORT_SYMBOL(gnttab_alloc_grant_references); EXPORT_SYMBOL(gnttab_free_grant_references); +EXPORT_SYMBOL(gnttab_free_grant_reference); EXPORT_SYMBOL(gnttab_claim_grant_reference); EXPORT_SYMBOL(gnttab_release_grant_reference); EXPORT_SYMBOL(gnttab_grant_foreign_access_ref); EXPORT_SYMBOL(gnttab_grant_foreign_transfer_ref); -static grant_ref_t gnttab_free_list[NR_GRANT_ENTRIES]; +#define NR_GRANT_ENTRIES (NR_GRANT_FRAMES * PAGE_SIZE / sizeof(grant_entry_t)) +#define GNTTAB_LIST_END (NR_GRANT_ENTRIES + 1) + +static grant_ref_t gnttab_list[NR_GRANT_ENTRIES]; +static int gnttab_free_count = NR_GRANT_ENTRIES; static grant_ref_t gnttab_free_head; +static spinlock_t gnttab_list_lock = SPIN_LOCK_UNLOCKED; static grant_entry_t *shared; -/* - * Lock-free grant-entry allocator - */ - -static inline int -get_free_entry( - void) -{ - grant_ref_t fh, nfh = gnttab_free_head; - do { if ( unlikely((fh = nfh) == NR_GRANT_ENTRIES) ) return -1; } - while ( unlikely((nfh = cmpxchg(&gnttab_free_head, fh, - gnttab_free_list[fh])) != fh) ); - return fh; +static struct gnttab_free_callback *gnttab_free_callback_list = NULL; + +static int +get_free_entries(int count) +{ + unsigned long flags; + int ref; + grant_ref_t head; + spin_lock_irqsave(&gnttab_list_lock, flags); + if (gnttab_free_count < count) { + spin_unlock_irqrestore(&gnttab_list_lock, flags); + return -1; + } + ref = head = gnttab_free_head; + gnttab_free_count -= count; + while (count-- > 1) + head = gnttab_list[head]; + gnttab_free_head = gnttab_list[head]; + gnttab_list[head] = GNTTAB_LIST_END; + spin_unlock_irqrestore(&gnttab_list_lock, flags); + return ref; +} + +#define get_free_entry() get_free_entries(1) + +static void +do_free_callbacks(void) +{ + struct gnttab_free_callback *callback = gnttab_free_callback_list, *next; + gnttab_free_callback_list = NULL; + while (callback) { + next = callback->next; + if (gnttab_free_count >= callback->count) { + callback->next = NULL; + callback->fn(callback->arg); + } else { + callback->next = gnttab_free_callback_list; + gnttab_free_callback_list = callback; + } + callback = next; + } } static inline void -put_free_entry( - grant_ref_t ref) -{ - grant_ref_t fh, nfh = gnttab_free_head; - do { gnttab_free_list[ref] = fh = nfh; wmb(); } - while ( unlikely((nfh = cmpxchg(&gnttab_free_head, fh, ref)) != fh) ); +check_free_callbacks(void) +{ + if (unlikely(gnttab_free_callback_list)) + do_free_callbacks(); +} + +static void +put_free_entry(grant_ref_t ref) +{ + unsigned long flags; + spin_lock_irqsave(&gnttab_list_lock, flags); + gnttab_list[ref] = gnttab_free_head; + gnttab_free_head = ref; + gnttab_free_count++; + check_free_callbacks(); + spin_unlock_irqrestore(&gnttab_list_lock, flags); } /* @@ -79,8 +123,7 @@ */ int -gnttab_grant_foreign_access( - domid_t domid, unsigned long frame, int readonly) +gnttab_grant_foreign_access(domid_t domid, unsigned long frame, int readonly) { int ref; @@ -96,8 +139,8 @@ } void -gnttab_grant_foreign_access_ref( - grant_ref_t ref, domid_t domid, unsigned long frame, int readonly) +gnttab_grant_foreign_access_ref(grant_ref_t ref, domid_t domid, + unsigned long frame, int readonly) { shared[ref].frame = frame; shared[ref].domid = domid; @@ -107,7 +150,7 @@ int -gnttab_query_foreign_access( grant_ref_t ref ) +gnttab_query_foreign_access(grant_ref_t ref) { u16 nflags; @@ -117,7 +160,7 @@ } void -gnttab_end_foreign_access( grant_ref_t ref, int readonly ) +gnttab_end_foreign_access(grant_ref_t ref, int readonly) { u16 flags, nflags; @@ -132,8 +175,7 @@ } int -gnttab_grant_foreign_transfer( - domid_t domid, unsigned long pfn ) +gnttab_grant_foreign_transfer(domid_t domid, unsigned long pfn) { int ref; @@ -149,8 +191,8 @@ } void -gnttab_grant_foreign_transfer_ref( - grant_ref_t ref, domid_t domid, unsigned long pfn ) +gnttab_grant_foreign_transfer_ref(grant_ref_t ref, domid_t domid, + unsigned long pfn) { shared[ref].frame = pfn; shared[ref].domid = domid; @@ -159,8 +201,7 @@ } unsigned long -gnttab_end_foreign_transfer( - grant_ref_t ref) +gnttab_end_foreign_transfer(grant_ref_t ref) { unsigned long frame = 0; u16 flags; @@ -189,59 +230,79 @@ } void -gnttab_free_grant_references( u16 count, grant_ref_t head ) -{ - /* TODO: O(N)...? */ - grant_ref_t to_die = 0, next = head; - int i; - - for ( i = 0; i < count; i++ ) - { - to_die = next; - next = gnttab_free_list[next]; - put_free_entry( to_die ); +gnttab_free_grant_reference(grant_ref_t ref) +{ + + put_free_entry(ref); +} + +void +gnttab_free_grant_references(grant_ref_t head) +{ + grant_ref_t ref; + unsigned long flags; + int count = 1; + if (head == GNTTAB_LIST_END) + return; + spin_lock_irqsave(&gnttab_list_lock, flags); + ref = head; + while (gnttab_list[ref] != GNTTAB_LIST_END) { + ref = gnttab_list[ref]; + count++; } -} - -int -gnttab_alloc_grant_references( u16 count, - grant_ref_t *head, - grant_ref_t *terminal ) -{ - int i; - grant_ref_t h = gnttab_free_head; - - for ( i = 0; i < count; i++ ) - if ( unlikely(get_free_entry() == -1) ) - goto not_enough_refs; + gnttab_list[ref] = gnttab_free_head; + gnttab_free_head = head; + gnttab_free_count += count; + check_free_callbacks(); + spin_unlock_irqrestore(&gnttab_list_lock, flags); +} + +int +gnttab_alloc_grant_references(u16 count, grant_ref_t *head) +{ + int h = get_free_entries(count); + + if (h == -1) + return -ENOSPC; *head = h; - *terminal = gnttab_free_head; return 0; - -not_enough_refs: - gnttab_free_head = h; - return -ENOSPC; -} - -int -gnttab_claim_grant_reference( grant_ref_t *private_head, - grant_ref_t terminal ) -{ - grant_ref_t g; - if ( unlikely((g = *private_head) == terminal) ) +} + +int +gnttab_claim_grant_reference(grant_ref_t *private_head) +{ + grant_ref_t g = *private_head; + if (unlikely(g == GNTTAB_LIST_END)) return -ENOSPC; - *private_head = gnttab_free_list[g]; + *private_head = gnttab_list[g]; return g; } void -gnttab_release_grant_reference( grant_ref_t *private_head, - grant_ref_t release ) -{ - gnttab_free_list[release] = *private_head; +gnttab_release_grant_reference(grant_ref_t *private_head, grant_ref_t release) +{ + gnttab_list[release] = *private_head; *private_head = release; +} + +void +gnttab_request_free_callback(struct gnttab_free_callback *callback, + void (*fn)(void *), void *arg, u16 count) +{ + unsigned long flags; + spin_lock_irqsave(&gnttab_list_lock, flags); + if (callback->next) + goto out; + callback->fn = fn; + callback->arg = arg; + callback->count = count; + callback->next = gnttab_free_callback_list; + gnttab_free_callback_list = callback; + check_free_callbacks(); + out: + spin_unlock_irqrestore(&gnttab_list_lock, flags); } /* @@ -252,8 +313,9 @@ static struct proc_dir_entry *grant_pde; -static int grant_ioctl(struct inode *inode, struct file *file, - unsigned int cmd, unsigned long data) +static int +grant_ioctl(struct inode *inode, struct file *file, unsigned int cmd, + unsigned long data) { int ret; privcmd_hypercall_t hypercall; @@ -291,8 +353,9 @@ ioctl: grant_ioctl, }; -static int grant_read(char *page, char **start, off_t off, - int count, int *eof, void *data) +static int +grant_read(char *page, char **start, off_t off, int count, int *eof, + void *data) { int len; unsigned int i; @@ -321,8 +384,9 @@ return len; } -static int grant_write(struct file *file, const char __user *buffer, - unsigned long count, void *data) +static int +grant_write(struct file *file, const char __user *buffer, unsigned long count, + void *data) { /* TODO: implement this */ return -ENOSYS; @@ -330,7 +394,8 @@ #endif /* CONFIG_PROC_FS */ -int gnttab_resume(void) +int +gnttab_resume(void) { gnttab_setup_table_t setup; unsigned long frames[NR_GRANT_FRAMES]; @@ -349,7 +414,8 @@ return 0; } -int gnttab_suspend(void) +int +gnttab_suspend(void) { int i; @@ -359,7 +425,8 @@ return 0; } -static int __init gnttab_init(void) +static int __init +gnttab_init(void) { int i; @@ -368,7 +435,7 @@ shared = (grant_entry_t *)fix_to_virt(FIX_GNTTAB_END); for ( i = 0; i < NR_GRANT_ENTRIES; i++ ) - gnttab_free_list[i] = i + 1; + gnttab_list[i] = i + 1; #ifdef CONFIG_PROC_FS /* diff -r cd984b3478f6 -r cc5f88b719d0 linux-2.6-xen-sparse/arch/xen/x86_64/kernel/Makefile --- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/Makefile Mon Aug 22 18:37:48 2005 +++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/Makefile Tue Aug 23 19:03:21 2005 @@ -44,7 +44,7 @@ c-obj-$(CONFIG_MODULES) += module.o -#obj-y += topology.o +obj-y += topology.o c-obj-y += intel_cacheinfo.o bootflag-y += ../../../i386/kernel/bootflag.o diff -r cd984b3478f6 -r cc5f88b719d0 linux-2.6-xen-sparse/arch/xen/x86_64/kernel/setup.c --- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/setup.c Mon Aug 22 18:37:48 2005 +++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/setup.c Tue Aug 23 19:03:21 2005 @@ -778,21 +778,21 @@ /* Make sure we have a large enough P->M table. */ if (end_pfn > xen_start_info.nr_pages) { phys_to_machine_mapping = alloc_bootmem( - max_pfn * sizeof(unsigned long)); + max_pfn * sizeof(u32)); memset(phys_to_machine_mapping, ~0, - max_pfn * sizeof(unsigned long)); + max_pfn * sizeof(u32)); memcpy(phys_to_machine_mapping, - (unsigned long *)xen_start_info.mfn_list, - xen_start_info.nr_pages * sizeof(unsigned long)); + (u32 *)xen_start_info.mfn_list, + xen_start_info.nr_pages * sizeof(u32)); free_bootmem( __pa(xen_start_info.mfn_list), PFN_PHYS(PFN_UP(xen_start_info.nr_pages * - sizeof(unsigned long)))); + sizeof(u32)))); } pfn_to_mfn_frame_list = alloc_bootmem(PAGE_SIZE); - for ( i=0, j=0; i < end_pfn; i+=(PAGE_SIZE/sizeof(unsigned long)), j++ ) + for ( i=0, j=0; i < end_pfn; i+=(PAGE_SIZE/sizeof(u32)), j++ ) { pfn_to_mfn_frame_list[j] = virt_to_mfn(&phys_to_machine_mapping[i]); diff -r cd984b3478f6 -r cc5f88b719d0 linux-2.6-xen-sparse/arch/xen/x86_64/mm/Makefile --- a/linux-2.6-xen-sparse/arch/xen/x86_64/mm/Makefile Mon Aug 22 18:37:48 2005 +++ b/linux-2.6-xen-sparse/arch/xen/x86_64/mm/Makefile Tue Aug 23 19:03:21 2005 @@ -6,10 +6,10 @@ CFLAGS += -Iarch/$(XENARCH)/mm -obj-y := init.o fault.o ioremap.o pageattr.o +obj-y := init.o fault.o pageattr.o c-obj-y := extable.o -i386-obj-y := hypervisor.o +i386-obj-y := hypervisor.o ioremap.o #obj-y := init.o fault.o ioremap.o extable.o pageattr.o #c-obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o diff -r cd984b3478f6 -r cc5f88b719d0 linux-2.6-xen-sparse/arch/xen/x86_64/mm/init.c --- a/linux-2.6-xen-sparse/arch/xen/x86_64/mm/init.c Mon Aug 22 18:37:48 2005 +++ b/linux-2.6-xen-sparse/arch/xen/x86_64/mm/init.c Tue Aug 23 19:03:21 2005 @@ -559,6 +559,11 @@ void __init xen_init_pt(void) { + int i; + + for (i = 0; i < NR_CPUS; i++) + per_cpu(cur_pgd, i) = init_mm.pgd; + memcpy((void *)init_level4_pgt, (void *)xen_start_info.pt_base, PAGE_SIZE); diff -r cd984b3478f6 -r cc5f88b719d0 linux-2.6-xen-sparse/drivers/xen/balloon/balloon.c --- a/linux-2.6-xen-sparse/drivers/xen/balloon/balloon.c Mon Aug 22 18:37:48 2005 +++ b/linux-2.6-xen-sparse/drivers/xen/balloon/balloon.c Tue Aug 23 19:03:21 2005 @@ -434,20 +434,20 @@ balloon_unlock(flags); } +static int dealloc_pte_fn( + pte_t *pte, struct page *pte_page, unsigned long addr, void *data) +{ + unsigned long mfn = pte_mfn(*pte); + set_pte(pte, __pte_ma(0)); + phys_to_machine_mapping[__pa(addr) >> PAGE_SHIFT] = + INVALID_P2M_ENTRY; + BUG_ON(HYPERVISOR_dom_mem_op( + MEMOP_decrease_reservation, &mfn, 1, 0) != 1); + return 0; +} + struct page *balloon_alloc_empty_page_range(unsigned long nr_pages) { - int f(pte_t *pte, struct page *pte_page, - unsigned long addr, void *data) - { - unsigned long mfn = pte_mfn(*pte); - set_pte(pte, __pte_ma(0)); - phys_to_machine_mapping[__pa(addr) >> PAGE_SHIFT] = - INVALID_P2M_ENTRY; - BUG_ON(HYPERVISOR_dom_mem_op( - MEMOP_decrease_reservation, &mfn, 1, 0) != 1); - return 0; - } - unsigned long vstart, flags; unsigned int order = get_order(nr_pages * PAGE_SIZE); @@ -459,7 +459,7 @@ balloon_lock(flags); BUG_ON(generic_page_range( - &init_mm, vstart, PAGE_SIZE << order, f, NULL) != 0); + &init_mm, vstart, PAGE_SIZE << order, dealloc_pte_fn, NULL)); current_pages -= 1UL << order; balloon_unlock(flags); diff -r cd984b3478f6 -r cc5f88b719d0 linux-2.6-xen-sparse/drivers/xen/blkback/blkback.c --- a/linux-2.6-xen-sparse/drivers/xen/blkback/blkback.c Mon Aug 22 18:37:48 2005 +++ b/linux-2.6-xen-sparse/drivers/xen/blkback/blkback.c Tue Aug 23 19:03:21 2005 @@ -11,6 +11,8 @@ * Copyright (c) 2005, Christopher Clark */ +#include <linux/spinlock.h> +#include <asm-xen/balloon.h> #include "common.h" /* @@ -63,9 +65,6 @@ static PEND_RING_IDX pending_prod, pending_cons; #define NR_PENDING_REQS (MAX_PENDING_REQS - pending_prod + pending_cons) -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0) -static kmem_cache_t *buffer_head_cachep; -#else static request_queue_t *plugged_queue; static inline void flush_plugged_queue(void) { @@ -78,7 +77,6 @@ plugged_queue = NULL; } } -#endif /* When using grant tables to map a frame for device access then the * handle returned must be used to unmap the frame. This is needed to @@ -182,11 +180,7 @@ blkif_t *blkif; struct list_head *ent; - daemonize( -#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0) - "xenblkd" -#endif - ); + daemonize("xenblkd"); for ( ; ; ) { @@ -213,11 +207,7 @@ } /* Push the batch through to disc. */ -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0) - run_task_queue(&tq_disk); -#else flush_plugged_queue(); -#endif } } @@ -266,13 +256,6 @@ } } -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0) -static void end_block_io_op(struct buffer_head *bh, int uptodate) -{ - __end_block_io_op(bh->b_private, uptodate); - kmem_cache_free(buffer_head_cachep, bh); -} -#else static int end_block_io_op(struct bio *bio, unsigned int done, int error) { if ( bio->bi_size != 0 ) @@ -281,7 +264,6 @@ bio_put(bio); return error; } -#endif /****************************************************************************** @@ -355,13 +337,9 @@ unsigned long buf; unsigned int nsec; } seg[BLKIF_MAX_SEGMENTS_PER_REQUEST]; unsigned int nseg; -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0) - struct buffer_head *bh; -#else struct bio *bio = NULL, *biolist[BLKIF_MAX_SEGMENTS_PER_REQUEST]; int nbio = 0; request_queue_t *q; -#endif /* Check that number of segments is sane. */ nseg = req->nr_segments; @@ -432,49 +410,6 @@ pending_req->operation = operation; pending_req->status = BLKIF_RSP_OKAY; pending_req->nr_pages = nseg; - -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0) - - atomic_set(&pending_req->pendcnt, nseg); - pending_cons++; - blkif_get(blkif); - - for ( i = 0; i < nseg; i++ ) - { - bh = kmem_cache_alloc(buffer_head_cachep, GFP_KERNEL); - if ( unlikely(bh == NULL) ) - { - __end_block_io_op(pending_req, 0); - continue; - } - - memset(bh, 0, sizeof (struct buffer_head)); - - init_waitqueue_head(&bh->b_wait); - bh->b_size = seg[i].nsec << 9; - bh->b_dev = preq.dev; - bh->b_rdev = preq.dev; - bh->b_rsector = (unsigned long)preq.sector_number; - bh->b_data = (char *)MMAP_VADDR(pending_idx, i) + - (seg[i].buf & ~PAGE_MASK); - bh->b_page = virt_to_page(MMAP_VADDR(pending_idx, i)); - bh->b_end_io = end_block_io_op; - bh->b_private = pending_req; - - bh->b_state = (1 << BH_Mapped) | (1 << BH_Lock) | - (1 << BH_Req) | (1 << BH_Launder); - if ( operation == WRITE ) - bh->b_state |= (1 << BH_JBD) | (1 << BH_Req) | (1 << BH_Uptodate); - - atomic_set(&bh->b_count, 1); - - /* Dispatch a single request. We'll flush it to disc later. */ - generic_make_request(operation, bh); - - preq.sector_number += seg[i].nsec; - } - -#else for ( i = 0; i < nseg; i++ ) { @@ -524,8 +459,6 @@ for ( i = 0; i < nbio; i++ ) submit_bio(operation, biolist[i]); -#endif - return; bad_descriptor: @@ -593,12 +526,6 @@ if ( kernel_thread(blkio_schedule, 0, CLONE_FS | CLONE_FILES) < 0 ) BUG(); -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0) - buffer_head_cachep = kmem_cache_create( - "buffer_head_cache", sizeof(struct buffer_head), - 0, SLAB_HWCACHE_ALIGN, NULL, NULL); -#endif - blkif_xenbus_init(); memset( pending_grant_handles, BLKBACK_INVALID_HANDLE, MMAP_PAGES ); diff -r cd984b3478f6 -r cc5f88b719d0 linux-2.6-xen-sparse/drivers/xen/blkback/common.h --- a/linux-2.6-xen-sparse/drivers/xen/blkback/common.h Mon Aug 22 18:37:48 2005 +++ b/linux-2.6-xen-sparse/drivers/xen/blkback/common.h Tue Aug 23 19:03:21 2005 @@ -5,7 +5,6 @@ #include <linux/config.h> #include <linux/version.h> #include <linux/module.h> -#include <linux/rbtree.h> #include <linux/interrupt.h> #include <linux/slab.h> #include <linux/blkdev.h> @@ -30,12 +29,13 @@ #define DPRINTK(_f, _a...) ((void)0) #endif -#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0) -typedef struct rb_root rb_root_t; -typedef struct rb_node rb_node_t; -#else -struct block_device; -#endif +struct vbd { + blkif_vdev_t handle; /* what the domain refers to this vbd as */ + unsigned char readonly; /* Non-zero -> read-only */ + unsigned char type; /* VDISK_xxx */ + blkif_pdev_t pdevice; /* phys device that this vbd maps to */ + struct block_device *bdev; +}; typedef struct blkif_st { /* Unique identifier for this interface. */ @@ -48,25 +48,18 @@ /* Comms information. */ blkif_back_ring_t blk_ring; /* VBDs attached to this interface. */ - rb_root_t vbd_rb; /* Mapping from 16-bit vdevices to VBDs.*/ - spinlock_t vbd_lock; /* Protects VBD mapping. */ + struct vbd vbd; /* Private fields. */ enum { DISCONNECTED, CONNECTED } status; - /* - * DISCONNECT response is deferred until pending requests are ack'ed. - * We therefore need to store the id from the original request. - */ - u8 disconnect_rspid; #ifdef CONFIG_XEN_BLKDEV_TAP_BE /* Is this a blktap frontend */ unsigned int is_blktap; #endif - struct blkif_st *hash_next; struct list_head blkdev_list; spinlock_t blk_ring_lock; atomic_t refcnt; - struct work_struct work; + struct work_struct free_work; u16 shmem_handle; unsigned long shmem_vaddr; grant_ref_t shmem_ref; @@ -77,30 +70,25 @@ void blkif_connect(blkif_be_connect_t *connect); int blkif_disconnect(blkif_be_disconnect_t *disconnect, u8 rsp_id); void blkif_disconnect_complete(blkif_t *blkif); -blkif_t *blkif_find(domid_t domid); -void free_blkif(blkif_t *blkif); +blkif_t *alloc_blkif(domid_t domid); +void free_blkif_callback(blkif_t *blkif); int blkif_map(blkif_t *blkif, unsigned long shared_page, unsigned int evtchn); #define blkif_get(_b) (atomic_inc(&(_b)->refcnt)) #define blkif_put(_b) \ do { \ if ( atomic_dec_and_test(&(_b)->refcnt) ) \ - free_blkif(_b); \ + free_blkif_callback(_b); \ } while (0) -struct vbd; -void vbd_free(blkif_t *blkif, struct vbd *vbd); - -/* Creates inactive vbd. */ -struct vbd *vbd_create(blkif_t *blkif, blkif_vdev_t vdevice, blkif_pdev_t pdevice, int readonly); -int vbd_is_active(struct vbd *vbd); -void vbd_activate(blkif_t *blkif, struct vbd *vbd); +/* Create a vbd. */ +int vbd_create(blkif_t *blkif, blkif_vdev_t vdevice, blkif_pdev_t pdevice, + int readonly); +void vbd_free(struct vbd *vbd); unsigned long vbd_size(struct vbd *vbd); unsigned int vbd_info(struct vbd *vbd); unsigned long vbd_secsize(struct vbd *vbd); -void vbd_destroy(blkif_be_vbd_destroy_t *delete); -void destroy_all_vbds(blkif_t *blkif); struct phys_req { unsigned short dev; diff -r cd984b3478f6 -r cc5f88b719d0 linux-2.6-xen-sparse/drivers/xen/blkback/interface.c --- a/linux-2.6-xen-sparse/drivers/xen/blkback/interface.c Mon Aug 22 18:37:48 2005 +++ b/linux-2.6-xen-sparse/drivers/xen/blkback/interface.c Tue Aug 23 19:03:21 2005 @@ -9,27 +9,11 @@ #include "common.h" #include <asm-xen/evtchn.h> -#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0) -#define VMALLOC_VMADDR(x) ((unsigned long)(x)) -#endif +static kmem_cache_t *blkif_cachep; -#define BLKIF_HASHSZ 1024 -#define BLKIF_HASH(_d) (((int)(_d))&(BLKIF_HASHSZ-1)) - -static kmem_cache_t *blkif_cachep; -static blkif_t *blkif_hash[BLKIF_HASHSZ]; - -blkif_t *blkif_find(domid_t domid) +blkif_t *alloc_blkif(domid_t domid) { - blkif_t *blkif = blkif_hash[BLKIF_HASH(domid)]; - - while (blkif) { - if (blkif->domid == domid) { - blkif_get(blkif); - return blkif; - } - blkif = blkif->hash_next; - } + blkif_t *blkif; blkif = kmem_cache_alloc(blkif_cachep, GFP_KERNEL); if (!blkif) @@ -38,12 +22,9 @@ memset(blkif, 0, sizeof(*blkif)); blkif->domid = domid; blkif->status = DISCONNECTED; - spin_lock_init(&blkif->vbd_lock); spin_lock_init(&blkif->blk_ring_lock); atomic_set(&blkif->refcnt, 1); - blkif->hash_next = blkif_hash[BLKIF_HASH(domid)]; - blkif_hash[BLKIF_HASH(domid)] = blkif; return blkif; } @@ -55,7 +36,7 @@ op.flags = GNTMAP_host_map; op.ref = shared_page; op.dom = blkif->domid; - + BUG_ON( HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1) ); if (op.handle < 0) { @@ -91,7 +72,7 @@ if ( (vma = get_vm_area(PAGE_SIZE, VM_IOREMAP)) == NULL ) return -ENOMEM; - err = map_frontend_page(blkif, VMALLOC_VMADDR(vma->addr), shared_page); + err = map_frontend_page(blkif, (unsigned long)vma->addr, shared_page); if (err) { vfree(vma->addr); return err; @@ -123,10 +104,10 @@ return 0; } -void free_blkif(blkif_t *blkif) +static void free_blkif(void *arg) { - blkif_t **pblkif; evtchn_op_t op = { .cmd = EVTCHNOP_close }; + blkif_t *blkif = (blkif_t *)arg; op.u.close.port = blkif->evtchn; op.u.close.dom = DOMID_SELF; @@ -134,6 +115,8 @@ op.u.close.port = blkif->remote_evtchn; op.u.close.dom = blkif->domid; HYPERVISOR_event_channel_op(&op); + + vbd_free(&blkif->vbd); if (blkif->evtchn) unbind_evtchn_from_irqhandler(blkif->evtchn, blkif); @@ -143,20 +126,17 @@ vfree(blkif->blk_ring.sring); } - pblkif = &blkif_hash[BLKIF_HASH(blkif->domid)]; - while ( *pblkif != blkif ) - { - BUG_ON(!*pblkif); - pblkif = &(*pblkif)->hash_next; - } - *pblkif = blkif->hash_next; - destroy_all_vbds(blkif); kmem_cache_free(blkif_cachep, blkif); +} + +void free_blkif_callback(blkif_t *blkif) +{ + INIT_WORK(&blkif->free_work, free_blkif, (void *)blkif); + schedule_work(&blkif->free_work); } void __init blkif_interface_init(void) { blkif_cachep = kmem_cache_create("blkif_cache", sizeof(blkif_t), 0, 0, NULL, NULL); - memset(blkif_hash, 0, sizeof(blkif_hash)); } diff -r cd984b3478f6 -r cc5f88b719d0 linux-2.6-xen-sparse/drivers/xen/blkback/vbd.c --- a/linux-2.6-xen-sparse/drivers/xen/blkback/vbd.c Mon Aug 22 18:37:48 2005 +++ b/linux-2.6-xen-sparse/drivers/xen/blkback/vbd.c Tue Aug 23 19:03:21 2005 @@ -2,10 +2,6 @@ * blkback/vbd.c * * Routines for managing virtual block devices (VBDs). - * - * NOTE: vbd_lock protects updates to the rb_tree against concurrent lookups - * in vbd_translate. All other lookups are implicitly protected because the - * only caller (the control message dispatch routine) serializes the calls. * * Copyright (c) 2003-2005, Keir Fraser & Steve Hand */ @@ -13,28 +9,13 @@ #include "common.h" #include <asm-xen/xenbus.h> -struct vbd { - blkif_vdev_t handle; /* what the domain refers to this vbd as */ - unsigned char readonly; /* Non-zero -> read-only */ - unsigned char type; /* VDISK_xxx */ - blkif_pdev_t pdevice; /* phys device that this vbd maps to */ - struct block_device *bdev; - - int active; - rb_node_t rb; /* for linking into R-B tree lookup struct */ -}; - -#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0) static inline dev_t vbd_map_devnum(blkif_pdev_t cookie) -{ return MKDEV(cookie>>8, cookie&0xff); } +{ + return MKDEV(BLKIF_MAJOR(cookie), BLKIF_MINOR(cookie)); +} #define vbd_sz(_v) ((_v)->bdev->bd_part ? \ (_v)->bdev->bd_part->nr_sects : (_v)->bdev->bd_disk->capacity) #define bdev_put(_b) blkdev_put(_b) -#else -#define vbd_sz(_v) (blk_size[MAJOR((_v)->pdevice)][MINOR((_v)->pdevice)]*2) -#define bdev_put(_b) ((void)0) -#define bdev_hardsect_size(_b) 512 -#endif unsigned long vbd_size(struct vbd *vbd) { @@ -51,45 +32,32 @@ return bdev_hardsect_size(vbd->bdev); } -int vbd_is_active(struct vbd *vbd) +int vbd_create(blkif_t *blkif, blkif_vdev_t handle, + blkif_pdev_t pdevice, int readonly) { - return vbd->active; -} + struct vbd *vbd; -struct vbd *vbd_create(blkif_t *blkif, blkif_vdev_t handle, - blkif_pdev_t pdevice, int readonly) -{ - struct vbd *vbd; - - if ( unlikely((vbd = kmalloc(sizeof(struct vbd), GFP_KERNEL)) == NULL) ) - { - DPRINTK("vbd_create: out of memory\n"); - return ERR_PTR(-ENOMEM); - } - + vbd = &blkif->vbd; vbd->handle = handle; vbd->readonly = readonly; vbd->type = 0; - vbd->active = 0; vbd->pdevice = pdevice; - /* FIXME: Who frees vbd on failure? --RR */ -#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0) vbd->bdev = open_by_devnum( vbd_map_devnum(vbd->pdevice), vbd->readonly ? FMODE_READ : FMODE_WRITE); if ( IS_ERR(vbd->bdev) ) { DPRINTK("vbd_creat: device %08x doesn't exist.\n", vbd->pdevice); - return ERR_PTR(-ENOENT); + return -ENOENT; } if ( (vbd->bdev->bd_disk == NULL) ) { DPRINTK("vbd_creat: device %08x doesn't exist.\n", vbd->pdevice); - bdev_put(vbd->bdev); - return ERR_PTR(-ENOENT); + vbd_free(vbd); + return -ENOENT; } if ( vbd->bdev->bd_disk->flags & GENHD_FL_CD ) @@ -97,121 +65,27 @@ if ( vbd->bdev->bd_disk->flags & GENHD_FL_REMOVABLE ) vbd->type |= VDISK_REMOVABLE; -#else - if ( (blk_size[MAJOR(vbd->pdevice)] == NULL) || (vbd_sz(vbd) == 0) ) - { - DPRINTK("vbd_creat: device %08x doesn't exist.\n", vbd->pdevice); - return ERR_PTR(-ENOENT); - } -#endif - DPRINTK("Successful creation of handle=%04x (dom=%u)\n", handle, blkif->domid); - return vbd; + return 0; } -void vbd_activate(blkif_t *blkif, struct vbd *vbd) +void vbd_free(struct vbd *vbd) { - rb_node_t **rb_p, *rb_parent = NULL; - struct vbd *i; - BUG_ON(vbd_is_active(vbd)); - - /* Find where to put it. */ - rb_p = &blkif->vbd_rb.rb_node; - while ( *rb_p != NULL ) - { - rb_parent = *rb_p; - i = rb_entry(rb_parent, struct vbd, rb); - if ( vbd->handle < i->handle ) - { - rb_p = &rb_parent->rb_left; - } - else if ( vbd->handle > i->handle ) - { - rb_p = &rb_parent->rb_right; - } - else - { - /* We never create two of same vbd, so not possible. */ - BUG(); - } - } - - /* Now we're active. */ - vbd->active = 1; - blkif_get(blkif); - - spin_lock(&blkif->vbd_lock); - rb_link_node(&vbd->rb, rb_parent, rb_p); - rb_insert_color(&vbd->rb, &blkif->vbd_rb); - spin_unlock(&blkif->vbd_lock); -} - -void vbd_free(blkif_t *blkif, struct vbd *vbd) -{ - if (vbd_is_active(vbd)) { - spin_lock(&blkif->vbd_lock); - rb_erase(&vbd->rb, &blkif->vbd_rb); - spin_unlock(&blkif->vbd_lock); - blkif_put(blkif); - } - bdev_put(vbd->bdev); - kfree(vbd); -} - -void destroy_all_vbds(blkif_t *blkif) -{ - struct vbd *vbd; - rb_node_t *rb; - - spin_lock(&blkif->vbd_lock); - - while ( (rb = blkif->vbd_rb.rb_node) != NULL ) - { - vbd = rb_entry(rb, struct vbd, rb); - rb_erase(rb, &blkif->vbd_rb); - spin_unlock(&blkif->vbd_lock); - bdev_put(vbd->bdev); - kfree(vbd); - spin_lock(&blkif->vbd_lock); - blkif_put(blkif); - } - - spin_unlock(&blkif->vbd_lock); + if (vbd->bdev) + bdev_put(vbd->bdev); + vbd->bdev = NULL; } int vbd_translate(struct phys_req *req, blkif_t *blkif, int operation) { - struct vbd *vbd; - rb_node_t *rb; - int rc = -EACCES; + struct vbd *vbd = &blkif->vbd; + int rc = -EACCES; - /* Take the vbd_lock because another thread could be updating the tree. */ - spin_lock(&blkif->vbd_lock); - - rb = blkif->vbd_rb.rb_node; - while ( rb != NULL ) - { - vbd = rb_entry(rb, struct vbd, rb); - if ( req->dev < vbd->handle ) - rb = rb->rb_left; - else if ( req->dev > vbd->handle ) - rb = rb->rb_right; - else - goto found; - } - - DPRINTK("vbd_translate; domain %u attempted to access " - "non-existent VBD.\n", blkif->domid); - rc = -ENODEV; - goto out; - - found: - - if ( (operation == WRITE) && vbd->readonly ) + if ((operation == WRITE) && vbd->readonly) goto out; - if ( unlikely((req->sector_number + req->nr_sects) > vbd_sz(vbd)) ) + if (unlikely((req->sector_number + req->nr_sects) > vbd_sz(vbd))) goto out; req->dev = vbd->pdevice; @@ -219,6 +93,5 @@ rc = 0; out: - spin_unlock(&blkif->vbd_lock); return rc; } diff -r cd984b3478f6 -r cc5f88b719d0 linux-2.6-xen-sparse/drivers/xen/blkback/xenbus.c --- a/linux-2.6-xen-sparse/drivers/xen/blkback/xenbus.c Mon Aug 22 18:37:48 2005 +++ b/linux-2.6-xen-sparse/drivers/xen/blkback/xenbus.c Tue Aug 23 19:03:21 2005 @@ -26,7 +26,6 @@ /* our communications channel */ blkif_t *blkif; - struct vbd *vbd; long int frontend_id; long int pdev; @@ -47,8 +46,6 @@ if (be->watch.node) unregister_xenbus_watch(&be->watch); unregister_xenbus_watch(&be->backend_watch); - if (be->vbd) - vbd_free(be->blkif, be->vbd); if (be->blkif) blkif_put(be->blkif); if (be->frontpath) @@ -72,7 +69,7 @@ device_unregister(&be->dev->dev); return; } - if (vbd_is_active(be->vbd)) + if (be->blkif->status == CONNECTED) return; err = xenbus_gather(be->frontpath, "grant-id", "%lu", &sharedmfn, @@ -85,9 +82,8 @@ } /* Domains must use same shared frame for all vbds. */ - if (be->blkif->status == CONNECTED && - (evtchn != be->blkif->remote_evtchn || - sharedmfn != be->blkif->shmem_frame)) { + if (evtchn != be->blkif->remote_evtchn || + sharedmfn != be->blkif->shmem_frame) { xenbus_dev_error(be->dev, err, "Shared frame/evtchn %li/%u not same as" " old %li/%u", @@ -105,7 +101,7 @@ } err = xenbus_printf(be->dev->nodename, "sectors", "%lu", - vbd_size(be->vbd)); + vbd_size(&be->blkif->vbd)); if (err) { xenbus_dev_error(be->dev, err, "writing %s/sectors", be->dev->nodename); @@ -114,33 +110,28 @@ /* FIXME: use a typename instead */ err = xenbus_printf(be->dev->nodename, "info", "%u", - vbd_info(be->vbd)); + vbd_info(&be->blkif->vbd)); if (err) { xenbus_dev_error(be->dev, err, "writing %s/info", be->dev->nodename); goto abort; } err = xenbus_printf(be->dev->nodename, "sector-size", "%lu", - vbd_secsize(be->vbd)); + vbd_secsize(&be->blkif->vbd)); if (err) { xenbus_dev_error(be->dev, err, "writing %s/sector-size", be->dev->nodename); goto abort; } - /* First vbd? We need to map the shared frame, irq etc. */ - if (be->blkif->status != CONNECTED) { - err = blkif_map(be->blkif, sharedmfn, evtchn); - if (err) { - xenbus_dev_error(be->dev, err, - "mapping shared-frame %lu port %u", - sharedmfn, evtchn); - goto abort; - } - } - - /* We're ready, activate. */ - vbd_activate(be->blkif, be->vbd); + /* Map the shared frame, irq etc. */ + err = blkif_map(be->blkif, sharedmfn, evtchn); + if (err) { + xenbus_dev_error(be->dev, err, + "mapping shared-frame %lu port %u", + sharedmfn, evtchn); + goto abort; + } xenbus_transaction_end(0); xenbus_dev_ok(be->dev); @@ -228,20 +219,16 @@ p = strrchr(be->frontpath, '/') + 1; handle = simple_strtoul(p, NULL, 0); - be->blkif = blkif_find(be->frontend_id); + be->blkif = alloc_blkif(be->frontend_id); if (IS_ERR(be->blkif)) { err = PTR_ERR(be->blkif); be->blkif = NULL; goto device_fail; } - be->vbd = vbd_create(be->blkif, handle, be->pdev, - be->readonly); - if (IS_ERR(be->vbd)) { - err = PTR_ERR(be->vbd); - be->vbd = NULL; + err = vbd_create(be->blkif, handle, be->pdev, be->readonly); + if (err) goto device_fail; - } frontend_changed(&be->watch, be->frontpath); } diff -r cd984b3478f6 -r cc5f88b719d0 linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c --- a/linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c Mon Aug 22 18:37:48 2005 +++ b/linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c Tue Aug 23 19:03:21 2005 @@ -63,25 +63,16 @@ /* Control whether runtime update of vbds is enabled. */ #define ENABLE_VBD_UPDATE 1 -#define BLKIF_STATE_CLOSED 0 -#define BLKIF_STATE_DISCONNECTED 1 -#define BLKIF_STATE_CONNECTED 2 - -static unsigned int blkif_state = BLKIF_STATE_CLOSED; -static unsigned int blkif_evtchn = 0; -static unsigned int blkif_vbds = 0; -static unsigned int blkif_vbds_connected = 0; - -static blkif_front_ring_t blk_ring; +#define BLKIF_STATE_DISCONNECTED 0 +#define BLKIF_STATE_CONNECTED 1 + +static unsigned int blkif_state = BLKIF_STATE_DISCONNECTED; #define BLK_RING_SIZE __RING_SIZE((blkif_sring_t *)0, PAGE_SIZE) -static domid_t rdomid = 0; -static grant_ref_t gref_head, gref_terminal; #define MAXIMUM_OUTSTANDING_BLOCK_REQS \ (BLKIF_MAX_SEGMENTS_PER_REQUEST * BLKIF_RING_SIZE) #define GRANTREF_INVALID (1<<15) -static int shmem_ref; static struct blk_shadow { blkif_request_t req; @@ -92,7 +83,7 @@ static int recovery = 0; /* Recovery in progress: protected by blkif_io_lock */ -static void kick_pending_request_queues(void); +static void kick_pending_request_queues(struct blkfront_info *info); static int __init xlblk_init(void); @@ -119,7 +110,7 @@ /* Kernel-specific definitions used in the common code */ #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0) -#define DISABLE_SCATTERGATHER() +#define DISABLE_SCATTERGATHER() #else static int sg_operation = -1; #define DISABLE_SCATTERGATHER() (sg_operation = -1) @@ -138,11 +129,11 @@ } -static inline void flush_requests(void) +static inline void flush_requests(struct blkfront_info *info) { DISABLE_SCATTERGATHER(); - RING_PUSH_REQUESTS(&blk_ring); - notify_via_evtchn(blkif_evtchn); + RING_PUSH_REQUESTS(&info->ring); + notify_via_evtchn(info->evtchn); } @@ -152,30 +143,39 @@ module_init(xlblk_init); -static struct xlbd_disk_info *head_waiting = NULL; -static void kick_pending_request_queues(void) -{ - struct xlbd_disk_info *di; - while ( ((di = head_waiting) != NULL) && !RING_FULL(&blk_ring) ) - { - head_waiting = di->next_waiting; - di->next_waiting = NULL; - /* Re-enable calldowns. */ - blk_start_queue(di->rq); - /* Kick things off immediately. */ - do_blkif_request(di->rq); - } +static void kick_pending_request_queues(struct blkfront_info *info) +{ + if (!RING_FULL(&info->ring)) { + /* Re-enable calldowns. */ + blk_start_queue(info->rq); + /* Kick things off immediately. */ + do_blkif_request(info->rq); + } +} + +static void blkif_restart_queue(void *arg) +{ + struct blkfront_info *info = (struct blkfront_info *)arg; + spin_lock_irq(&blkif_io_lock); + kick_pending_request_queues(info); + spin_unlock_irq(&blkif_io_lock); +} + +static void blkif_restart_queue_callback(void *arg) +{ + struct blkfront_info *info = (struct blkfront_info *)arg; + schedule_work(&info->work); } int blkif_open(struct inode *inode, struct file *filep) { - struct gendisk *gd = inode->i_bdev->bd_disk; - struct xlbd_disk_info *di = (struct xlbd_disk_info *)gd->private_data; - - /* Update of usage count is protected by per-device semaphore. */ - di->mi->usage++; - - return 0; + // struct gendisk *gd = inode->i_bdev->bd_disk; + // struct xlbd_disk_info *di = (struct xlbd_disk_info *)gd->private_data; + + /* Update of usage count is protected by per-device semaphore. */ + // di->mi->usage++; + + return 0; } @@ -192,8 +192,8 @@ int i; DPRINTK_IOCTL("command: 0x%x, argument: 0x%lx, dev: 0x%04x\n", - command, (long)argument, inode->i_rdev); - + command, (long)argument, inode->i_rdev); + switch ( command ) { case HDIO_GETGEO: @@ -219,7 +219,7 @@ /* * blkif_queue_request * - * request block io + * request block io * * id: for guest use only. * operation: BLKIF_OP_{READ,WRITE,PROBE} @@ -228,7 +228,7 @@ */ static int blkif_queue_request(struct request *req) { - struct xlbd_disk_info *di = req->rq_disk->private_data; + struct blkfront_info *info = req->rq_disk->private_data; unsigned long buffer_ma; blkif_request_t *ring_req; struct bio *bio; @@ -237,20 +237,28 @@ unsigned long id; unsigned int fsect, lsect; int ref; - - if ( unlikely(blkif_state != BLKIF_STATE_CONNECTED) ) + grant_ref_t gref_head; + + if (unlikely(info->connected != BLKIF_STATE_CONNECTED)) return 1; + if (gnttab_alloc_grant_references(BLKIF_MAX_SEGMENTS_PER_REQUEST, + &gref_head) < 0) { + gnttab_request_free_callback(&info->callback, + blkif_restart_queue_callback, info, + BLKIF_MAX_SEGMENTS_PER_REQUEST); + return 1; + } + /* Fill out a communications ring structure. */ - ring_req = RING_GET_REQUEST(&blk_ring, blk_ring.req_prod_pvt); + ring_req = RING_GET_REQUEST(&info->ring, info->ring.req_prod_pvt); id = GET_ID_FROM_FREELIST(); blk_shadow[id].request = (unsigned long)req; ring_req->id = id; - ring_req->operation = rq_data_dir(req) ? BLKIF_OP_WRITE : - BLKIF_OP_READ; + ring_req->operation = rq_data_dir(req) ? BLKIF_OP_WRITE : BLKIF_OP_READ; ring_req->sector_number = (blkif_sector_t)req->sector; - ring_req->handle = di->handle; + ring_req->handle = info->handle; ring_req->nr_segments = 0; rq_for_each_bio(bio, req) @@ -263,31 +271,34 @@ fsect = bvec->bv_offset >> 9; lsect = fsect + (bvec->bv_len >> 9) - 1; /* install a grant reference. */ - ref = gnttab_claim_grant_reference(&gref_head, gref_terminal); + ref = gnttab_claim_grant_reference(&gref_head); ASSERT( ref != -ENOSPC ); gnttab_grant_foreign_access_ref( ref, - rdomid, + info->backend_id, buffer_ma >> PAGE_SHIFT, rq_data_dir(req) ); blk_shadow[id].frame[ring_req->nr_segments] = buffer_ma >> PAGE_SHIFT; - ring_req->frame_and_sects[ring_req->nr_segments++] = + ring_req->frame_and_sects[ring_req->nr_segments] = blkif_fas_from_gref(ref, fsect, lsect); + + ring_req->nr_segments++; } } - blk_ring.req_prod_pvt++; - + info->ring.req_prod_pvt++; + /* Keep a private copy so we can reissue requests when recovering. */ pickle_request(&blk_shadow[id], ring_req); + gnttab_free_grant_references(gref_head); + return 0; } - /* * do_blkif_request @@ -295,24 +306,26 @@ */ void do_blkif_request(request_queue_t *rq) { - struct xlbd_disk_info *di; + struct blkfront_info *info = NULL; struct request *req; int queued; - DPRINTK("Entered do_blkif_request\n"); + DPRINTK("Entered do_blkif_request\n"); queued = 0; while ( (req = elv_next_request(rq)) != NULL ) { + info = req->rq_disk->private_data; + if ( !blk_fs_request(req) ) { end_request(req, 0); continue; } - if ( RING_FULL(&blk_ring) ) - goto wait; + if (RING_FULL(&info->ring)) + goto wait; DPRINTK("do_blk_req %p: cmd %p, sec %lx, (%u/%li) buffer:%p [%s]\n", req, req->cmd, req->sector, req->current_nr_sectors, @@ -320,25 +333,19 @@ rq_data_dir(req) ? "write" : "read"); blkdev_dequeue_request(req); - if ( blkif_queue_request(req) ) - { + if (blkif_queue_request(req)) { + blk_requeue_request(rq, req); wait: - di = req->rq_disk->private_data; - if ( di->next_waiting == NULL ) - { - di->next_waiting = head_waiting; - head_waiting = di; - /* Avoid pointless unplugs. */ - blk_stop_queue(rq); - } - break; + /* Avoid pointless unplugs. */ + blk_stop_queue(rq); + break; } queued++; } if ( queued != 0 ) - flush_requests(); + flush_requests(info); } @@ -347,25 +354,24 @@ struct request *req; blkif_response_t *bret; RING_IDX i, rp; - unsigned long flags; - - spin_lock_irqsave(&blkif_io_lock, flags); - - if ( unlikely(blkif_state == BLKIF_STATE_CLOSED) || - unlikely(recovery) ) - { + unsigned long flags; + struct blkfront_info *info = (struct blkfront_info *)dev_id; + + spin_lock_irqsave(&blkif_io_lock, flags); + + if (unlikely(info->connected != BLKIF_STATE_CONNECTED || recovery)) { spin_unlock_irqrestore(&blkif_io_lock, flags); return IRQ_HANDLED; } - - rp = blk_ring.sring->rsp_prod; + + rp = info->ring.sring->rsp_prod; rmb(); /* Ensure we see queued responses up to 'rp'. */ - for ( i = blk_ring.rsp_cons; i != rp; i++ ) + for ( i = info->ring.rsp_cons; i != rp; i++ ) { unsigned long id; - bret = RING_GET_RESPONSE(&blk_ring, i); + bret = RING_GET_RESPONSE(&info->ring, i); id = bret->id; req = (struct request *)blk_shadow[id].request; @@ -382,7 +388,7 @@ bret->status); if ( unlikely(end_that_request_first - (req, + (req, (bret->status == BLKIF_RSP_OKAY), req->hard_nr_sectors)) ) BUG(); @@ -394,9 +400,9 @@ } } - blk_ring.rsp_cons = i; - - kick_pending_request_queues(); + info->ring.rsp_cons = i; + + kick_pending_request_queues(info); spin_unlock_irqrestore(&blkif_io_lock, flags); @@ -425,31 +431,31 @@ static void kick_pending_request_queues(void) { /* We kick pending request queues if the ring is reasonably empty. */ - if ( (nr_pending != 0) && - (RING_PENDING_REQUESTS(&blk_ring) < (BLK_RING_SIZE >> 1)) ) + if ( (nr_pending != 0) && + (RING_PENDING_REQUESTS(&info->ring) < (BLK_RING_SIZE >> 1)) ) { /* Attempt to drain the queue, but bail if the ring becomes full. */ - while ( (nr_pending != 0) && !RING_FULL(&blk_ring) ) + while ( (nr_pending != 0) && !RING_FULL(&info->ring) ) do_blkif_request(pending_queues[--nr_pending]); } } int blkif_open(struct inode *inode, struct file *filep) { - short xldev = inode->i_rdev; + short xldev = inode->i_rdev; struct gendisk *gd = get_gendisk(xldev); xl_disk_t *disk = xldev_to_xldisk(inode->i_rdev); - short minor = MINOR(xldev); + short minor = MINOR(xldev); if ( gd->part[minor].nr_sects == 0 ) - { + { /* * Device either doesn't exist, or has zero capacity; we use a few * cheesy heuristics to return the relevant error code */ if ( (gd->sizes[minor >> gd->minor_shift] != 0) || ((minor & (gd->max_p - 1)) != 0) ) - { + { /* * We have a real device, but no such partition, or we just have a * partition number so guess this is the problem. @@ -458,16 +464,16 @@ } else if ( gd->flags[minor >> gd->minor_shift] & GENHD_FL_REMOVABLE ) { - /* This is a removable device => assume that media is missing. */ + /* This is a removable device => assume that media is missing. */ return -ENOMEDIUM; /* media not present (this is a guess) */ - } + } else - { + { /* Just go for the general 'no such device' error. */ return -ENODEV; /* no such device */ } } - + /* Update of usage count is protected by per-device semaphore. */ disk->usage++; @@ -496,24 +502,24 @@ { kdev_t dev = inode->i_rdev; struct hd_geometry *geo = (struct hd_geometry *)argument; - struct gendisk *gd; - struct hd_struct *part; + struct gendisk *gd; + struct hd_struct *part; int i; unsigned short cylinders; byte heads, sectors; /* NB. No need to check permissions. That is done for us. */ - + DPRINTK_IOCTL("command: 0x%x, argument: 0x%lx, dev: 0x%04x\n", - command, (long) argument, dev); - + command, (long) argument, dev); + gd = get_gendisk(dev); - part = &gd->part[MINOR(dev)]; + part = &gd->part[MINOR(dev)]; switch ( command ) { case BLKGETSIZE: - DPRINTK_IOCTL(" BLKGETSIZE: %x %lx\n", BLKGETSIZE, part->nr_sects); + DPRINTK_IOCTL(" BLKGETSIZE: %x %lx\n", BLKGETSIZE, part->nr_sects); return put_user(part->nr_sects, (unsigned long *) argument); case BLKGETSIZE64: @@ -526,7 +532,7 @@ return blkif_revalidate(dev); case BLKSSZGET: - return hardsect_size[MAJOR(dev)][MINOR(dev)]; + return hardsect_size[MAJOR(dev)][MINOR(dev)]; case BLKBSZGET: /* get block size */ DPRINTK_IOCTL(" BLKBSZGET: %x\n", BLKBSZGET); @@ -552,7 +558,7 @@ values consistent with the size of the device */ heads = 0xff; - sectors = 0x3f; + sectors = 0x3f; cylinders = part->nr_sects / (heads * sectors); if (put_user(0x00, (unsigned long *) &geo->start)) return -EFAULT; @@ -562,7 +568,7 @@ return 0; - case HDIO_GETGEO_BIG: + case HDIO_GETGEO_BIG: DPRINTK_IOCTL(" HDIO_GETGEO_BIG: %x\n", HDIO_GETGEO_BIG); if (!argument) return -EINVAL; @@ -570,7 +576,7 @@ values consistent with the size of the device */ heads = 0xff; - sectors = 0x3f; + sectors = 0x3f; cylinders = part->nr_sects / (heads * sectors); if (put_user(0x00, (unsigned long *) &geo->start)) return -EFAULT; @@ -594,7 +600,7 @@ WPRINTK("ioctl %08x not supported by XL blkif\n", command); return -ENOSYS; } - + return 0; } @@ -614,7 +620,7 @@ xl_disk_t *disk; unsigned long capacity; int i, rc = 0; - + if ( (bd = bdget(dev)) == NULL ) return -EINVAL; @@ -662,7 +668,7 @@ /* * blkif_queue_request * - * request block io + * request block io * * id: for guest use only. * operation: BLKIF_OP_{READ,WRITE,PROBE} @@ -696,7 +702,7 @@ buffer_ma &= PAGE_MASK; - if ( unlikely(blkif_state != BLKIF_STATE_CONNECTED) ) + if (unlikely(info->connected != BLKIF_STATE_CONNECTED)) return 1; switch ( operation ) @@ -704,7 +710,7 @@ case BLKIF_OP_READ: case BLKIF_OP_WRITE: - gd = get_gendisk(device); + gd = get_gendisk(device); /* * Update the sector_number we'll pass down as appropriate; note that @@ -714,10 +720,10 @@ sector_number += gd->part[MINOR(device)].start_sect; /* - * If this unit doesn't consist of virtual partitions then we clear + * If this unit doesn't consist of virtual partitions then we clear * the partn bits from the device number. */ - if ( !(gd->flags[MINOR(device)>>gd->minor_shift] & + if ( !(gd->flags[MINOR(device)>>gd->minor_shift] & GENHD_FL_VIRT_PARTNS) ) device &= ~(gd->max_p - 1); @@ -725,20 +731,20 @@ (sg_dev == device) && (sg_next_sect == sector_number) ) { - req = RING_GET_REQUEST(&blk_ring, - blk_ring.req_prod_pvt - 1); + req = RING_GET_REQUEST(&info->ring, + info->ring.req_prod_pvt - 1); bh = (struct buffer_head *)id; - + bh->b_reqnext = (struct buffer_head *)blk_shadow[req->id].request; blk_shadow[req->id].request = (unsigned long)id; /* install a grant reference. */ - ref = gnttab_claim_grant_reference(&gref_head, gref_terminal); + ref = gnttab_claim_grant_reference(&gref_head); ASSERT( ref != -ENOSPC ); gnttab_grant_foreign_access_ref( ref, - rdomid, + info->backend_id, buffer_ma >> PAGE_SHIFT, ( operation == BLKIF_OP_WRITE ? 1 : 0 ) ); @@ -757,7 +763,7 @@ return 0; } - else if ( RING_FULL(&blk_ring) ) + else if ( RING_FULL(&info->ring) ) { return 1; } @@ -774,7 +780,7 @@ } /* Fill out a communications ring structure. */ - req = RING_GET_REQUEST(&blk_ring, blk_ring.req_prod_pvt); + req = RING_GET_REQUEST(&info->ring, info->ring.req_prod_pvt); xid = GET_ID_FROM_FREELIST(); blk_shadow[xid].request = (unsigned long)id; @@ -782,15 +788,15 @@ req->id = xid; req->operation = operation; req->sector_number = (blkif_sector_t)sector_number; - req->handle = handle; + req->handle = handle; req->nr_segments = 1; /* install a grant reference. */ - ref = gnttab_claim_grant_reference(&gref_head, gref_terminal); + ref = gnttab_claim_grant_reference(&gref_head); ASSERT( ref != -ENOSPC ); gnttab_grant_foreign_access_ref( ref, - rdomid, + info->backend_id, buffer_ma >> PAGE_SHIFT, ( operation == BLKIF_OP_WRITE ? 1 : 0 ) ); @@ -798,11 +804,11 @@ req->frame_and_sects[0] = blkif_fas_from_gref(ref, fsect, lsect); - /* Keep a private copy so we can reissue requests when recovering. */ + /* Keep a private copy so we can reissue requests when recovering. */ pickle_request(&blk_shadow[xid], req); - blk_ring.req_prod_pvt++; - + info->ring.req_prod_pvt++; + return 0; } @@ -817,13 +823,13 @@ struct buffer_head *bh, *next_bh; int rw, nsect, full, queued = 0; - DPRINTK("Entered do_blkif_request\n"); + DPRINTK("Entered do_blkif_request\n"); while ( !rq->plugged && !list_empty(&rq->queue_head)) { - if ( (req = blkdev_entry_next_request(&rq->queue_head)) == NULL ) + if ( (req = blkdev_entry_next_request(&rq->queue_head)) == NULL ) goto out; - + DPRINTK("do_blkif_request %p: cmd %i, sec %lx, (%li/%li) bh:%p\n", req, req->cmd, req->sector, req->current_nr_sectors, req->nr_sectors, req->bh); @@ -844,16 +850,16 @@ full = blkif_queue_request( (unsigned long)bh, - (rw == READ) ? BLKIF_OP_READ : BLKIF_OP_WRITE, + (rw == READ) ? BLKIF_OP_READ : BLKIF_OP_WRITE, bh->b_data, bh->b_rsector, bh->b_size>>9, bh->b_rdev); if ( full ) - { + { bh->b_reqnext = next_bh; pending_queues[nr_pending++] = rq; if ( unlikely(nr_pending >= MAX_PENDING) ) BUG(); - goto out; + goto out; } queued++; @@ -861,7 +867,7 @@ /* Dequeue the buffer head from the request. */ nsect = bh->b_size >> 9; bh = req->bh = next_bh; - + if ( bh != NULL ) { /* There's another buffer head to do. Update the request. */ @@ -891,27 +897,27 @@ static void blkif_int(int irq, void *dev_id, struct pt_regs *ptregs) { - RING_IDX i, rp; - unsigned long flags; + RING_IDX i, rp; + unsigned long flags; struct buffer_head *bh, *next_bh; - - spin_lock_irqsave(&io_request_lock, flags); - - if ( unlikely(blkif_state == BLKIF_STATE_CLOSED || recovery) ) + + spin_lock_irqsave(&io_request_lock, flags); + + if ( unlikely(info->connected != BLKIF_STATE_CONNECTED || recovery) ) { spin_unlock_irqrestore(&io_request_lock, flags); return; } - rp = blk_ring.sring->rsp_prod; + rp = info->ring.sring->rsp_prod; rmb(); /* Ensure we see queued responses up to 'rp'. */ - for ( i = blk_ring.rsp_cons; i != rp; i++ ) + for ( i = info->ring.rsp_cons; i != rp; i++ ) { unsigned long id; blkif_response_t *bret; - - bret = RING_GET_RESPONSE(&blk_ring, i); + + bret = RING_GET_RESPONSE(&info->ring, i); id = bret->id; bh = (struct buffer_head *)blk_shadow[id].request; @@ -943,8 +949,8 @@ } } - blk_ring.rsp_cons = i; - + info->ring.rsp_cons = i; + kick_pending_request_queues(); spin_unlock_irqrestore(&io_request_lock, flags); @@ -954,24 +960,24 @@ /***************************** COMMON CODE *******************************/ -static void blkif_free(void) +static void blkif_free(struct blkfront_info *info) { /* Prevent new requests being issued until we fix things up. */ spin_lock_irq(&blkif_io_lock); - blkif_state = BLKIF_STATE_DISCONNECTED; + info->connected = BLKIF_STATE_DISCONNECTED; spin_unlock_irq(&blkif_io_lock); /* Free resources associated with old device channel. */ - if ( blk_ring.sring != NULL ) - { - free_page((unsigned long)blk_ring.sring); - blk_ring.sring = NULL; - } - unbind_evtchn_from_irqhandler(blkif_evtchn, NULL); - blkif_evtchn = 0; -} - -static void blkif_recover(void) + if ( info->ring.sring != NULL ) + { + free_page((unsigned long)info->ring.sring); + info->ring.sring = NULL; + } + unbind_evtchn_from_irqhandler(info->evtchn, NULL); + info->evtchn = 0; +} + +static void blkif_recover(struct blkfront_info *info) { int i; blkif_request_t *req; @@ -987,7 +993,7 @@ memset(&blk_shadow, 0, sizeof(blk_shadow)); for ( i = 0; i < BLK_RING_SIZE; i++ ) blk_shadow[i].req.id = i+1; - blk_shadow_free = blk_ring.req_prod_pvt; + blk_shadow_free = info->ring.req_prod_pvt; blk_shadow[BLK_RING_SIZE-1].req.id = 0x0fffffff; /* Stage 3: Find pending requests and requeue them. */ @@ -999,7 +1005,7 @@ /* Grab a request slot and unpickle shadow state into it. */ req = RING_GET_REQUEST( - &blk_ring, blk_ring.req_prod_pvt); + &info->ring, info->ring.req_prod_pvt); unpickle_request(req, ©[i]); /* We get a new request id, and must reset the shadow state. */ @@ -1012,7 +1018,7 @@ if ( req->frame_and_sects[j] & GRANTREF_INVALID ) gnttab_grant_foreign_access_ref( blkif_gref_from_fas(req->frame_and_sects[j]), - rdomid, + info->backend_id, blk_shadow[req->id].frame[j], rq_data_dir((struct request *) blk_shadow[req->id].request)); @@ -1020,32 +1026,31 @@ } blk_shadow[req->id].req = *req; - blk_ring.req_prod_pvt++; + info->ring.req_prod_pvt++; } kfree(copy); recovery = 0; - /* blk_ring->req_prod will be set when we flush_requests().*/ + /* info->ring->req_prod will be set when we flush_requests().*/ wmb(); /* Kicks things back into life. */ - flush_requests(); + flush_requests(info); /* Now safe to left other people use the interface. */ - blkif_state = BLKIF_STATE_CONNECTED; -} - -static void blkif_connect(u16 evtchn, domid_t domid) + info->connected = BLKIF_STATE_CONNECTED; +} + +static void blkif_connect(struct blkfront_info *info, u16 evtchn) { int err = 0; - blkif_evtchn = evtchn; - rdomid = domid; + info->evtchn = evtchn; err = bind_evtchn_to_irqhandler( - blkif_evtchn, blkif_int, SA_SAMPLE_RANDOM, "blkif", NULL); + info->evtchn, blkif_int, SA_SAMPLE_RANDOM, "blkif", info); if ( err != 0 ) { WPRINTK("bind_evtchn_to_irqhandler failed (err=%d)\n", err); @@ -1059,17 +1064,6 @@ { "" } }; -struct blkfront_info -{ - /* We watch the backend */ - struct xenbus_watch watch; - int vdevice; - u16 handle; - int connected; - struct xenbus_device *dev; - char *backend; -}; - static void watch_for_status(struct xenbus_watch *watch, const char *node) { struct blkfront_info *info; @@ -1081,35 +1075,33 @@ node += strlen(watch->node); /* FIXME: clean up when error on the other end. */ - if (info->connected) + if (info->connected == BLKIF_STATE_CONNECTED) return; - err = xenbus_gather(watch->node, + err = xenbus_gather(watch->node, "sectors", "%lu", §ors, "info", "%u", &binfo, "sector-size", "%lu", §or_size, NULL); if (err) { - xenbus_dev_error(info->dev, err, "reading backend fields"); + xenbus_dev_error(info->xbdev, err, "reading backend fields"); return; } - xlvbd_add(sectors, info->vdevice, info->handle, binfo, sector_size); - info->connected = 1; - - /* First to connect? blkif is now connected. */ - if (blkif_vbds_connected++ == 0) - blkif_state = BLKIF_STATE_CONNECTED; - - xenbus_dev_ok(info->dev); + xlvbd_add(sectors, info->vdevice, binfo, sector_size, info); + info->connected = BLKIF_STATE_CONNECTED; + + blkif_state = BLKIF_STATE_CONNECTED; + + xenbus_dev_ok(info->xbdev); /* Kick pending requests. */ spin_lock_irq(&blkif_io_lock); - kick_pending_request_queues(); + kick_pending_request_queues(info); spin_unlock_irq(&blkif_io_lock); } -static int setup_blkring(struct xenbus_device *dev, unsigned int backend_id) +static int setup_blkring(struct xenbus_device *dev, struct blkfront_info *info) { blkif_sring_t *sring; evtchn_op_t op = { .cmd = EVTCHNOP_alloc_unbound }; @@ -1121,25 +1113,28 @@ return -ENOMEM; } SHARED_RING_INIT(sring); - FRONT_RING_INIT(&blk_ring, sring, PAGE_SIZE); - - shmem_ref = gnttab_claim_grant_reference(&gref_head, - gref_terminal); - ASSERT(shmem_ref != -ENOSPC); - gnttab_grant_foreign_access_ref(shmem_ref, - backend_id, - virt_to_mfn(blk_ring.sring), - 0); - - op.u.alloc_unbound.dom = backend_id; + FRONT_RING_INIT(&info->ring, sring, PAGE_SIZE); + + err = gnttab_grant_foreign_access(info->backend_id, + virt_to_mfn(info->ring.sring), 0); + if (err == -ENOSPC) { + free_page((unsigned long)info->ring.sring); + info->ring.sring = 0; + xenbus_dev_error(dev, err, "granting access to ring page"); + return err; + } + info->grant_id = err; + + op.u.alloc_unbound.dom = info->backend_id; err = HYPERVISOR_event_channel_op(&op); if (err) { - free_page((unsigned long)blk_ring.sring); - blk_ring.sring = 0; + gnttab_end_foreign_access(info->grant_id, 0); + free_page((unsigned long)info->ring.sring); + info->ring.sring = 0; xenbus_dev_error(dev, err, "allocating event channel"); return err; } - blkif_connect(op.u.alloc_unbound.port, backend_id); + blkif_connect(info, op.u.alloc_unbound.port); return 0; } @@ -1149,11 +1144,11 @@ { char *backend; const char *message; - int err, backend_id; + int err; backend = NULL; err = xenbus_gather(dev->nodename, - "backend-id", "%i", &backend_id, + "backend-id", "%i", &info->backend_id, "backend", NULL, &backend, NULL); if (XENBUS_EXIST_ERR(err)) @@ -1168,12 +1163,10 @@ goto out; } - /* First device? We create shared ring, alloc event channel. */ - if (blkif_vbds == 0) { - err = setup_blkring(dev, backend_id); - if (err) - goto out; - } + /* Create shared ring, alloc event channel. */ + err = setup_blkring(dev, info); + if (err) + goto out; err = xenbus_transaction_start(dev->nodename); if (err) { @@ -1181,13 +1174,13 @@ goto destroy_blkring; } - err = xenbus_printf(dev->nodename, "grant-id","%u", shmem_ref); + err = xenbus_printf(dev->nodename, "grant-id","%u", info->grant_id); if (err) { message = "writing grant-id"; goto abort_transaction; } err = xenbus_printf(dev->nodename, - "event-channel", "%u", blkif_evtchn); + "event-channel", "%u", info->evtchn); if (err) { message = "writing event-channel"; goto abort_transaction; @@ -1220,8 +1213,7 @@ /* Have to do this *outside* transaction. */ xenbus_dev_error(dev, err, "%s", message); destroy_blkring: - if (blkif_vbds == 0) - blkif_free(); + blkif_free(info); goto out; } @@ -1250,9 +1242,11 @@ xenbus_dev_error(dev, err, "allocating info structure"); return err; } - info->dev = dev; + info->xbdev = dev; info->vdevice = vdevice; - info->connected = 0; + info->connected = BLKIF_STATE_DISCONNECTED; + info->mi = NULL; + INIT_WORK(&info->work, blkif_restart_queue, (void *)info); /* Front end dir is a number, which is used as the id. */ info->handle = simple_strtoul(strrchr(dev->nodename,'/')+1, NULL, 0); @@ -1266,7 +1260,6 @@ /* Call once in case entries already there. */ watch_for_status(&info->watch, info->watch.node); - blkif_vbds++; return 0; } @@ -1277,15 +1270,13 @@ if (info->backend) unregister_xenbus_watch(&info->watch); - if (info->connected) { - xlvbd_del(info->handle); - blkif_vbds_connected--; - } + if (info->mi) + xlvbd_del(info); + + blkif_free(info); + kfree(info->backend); kfree(info); - - if (--blkif_vbds == 0) - blkif_free(); return 0; } @@ -1298,10 +1289,8 @@ kfree(info->backend); info->backend = NULL; - if (--blkif_vbds == 0) { - recovery = 1; - blkif_free(); - } + recovery = 1; + blkif_free(info); return 0; } @@ -1314,8 +1303,7 @@ /* FIXME: Check geometry hasn't changed here... */ err = talk_to_backend(dev, info); if (!err) { - if (blkif_vbds++ == 0) - blkif_recover(); + blkif_recover(info); } return err; } @@ -1363,11 +1351,6 @@ { int i; - /* A grant for every ring slot, plus one for the ring itself. */ - if (gnttab_alloc_grant_references(MAXIMUM_OUTSTANDING_BLOCK_REQS + 1, - &gref_head, &gref_terminal) < 0) - return 1; - if ( (xen_start_info.flags & SIF_INITDOMAIN) || (xen_start_info.flags & SIF_BLK_BE_DOMAIN) ) return 0; @@ -1391,6 +1374,6 @@ { int i; for ( i = 0; i < s->req.nr_segments; i++ ) - gnttab_release_grant_reference( - &gref_head, blkif_gref_from_fas(s->req.frame_and_sects[i])); -} + gnttab_free_grant_reference( + blkif_gref_from_fas(s->req.frame_and_sects[i])); +} diff -r cd984b3478f6 -r cc5f88b719d0 linux-2.6-xen-sparse/drivers/xen/blkfront/block.h --- a/linux-2.6-xen-sparse/drivers/xen/blkfront/block.h Mon Aug 22 18:37:48 2005 +++ b/linux-2.6-xen-sparse/drivers/xen/blkfront/block.h Tue Aug 23 19:03:21 2005 @@ -46,6 +46,7 @@ #include <linux/major.h> #include <linux/devfs_fs_kernel.h> #include <asm-xen/hypervisor.h> +#include <asm-xen/xenbus.h> #include <asm-xen/xen-public/xen.h> #include <asm-xen/xen-public/io/blkif.h> #include <asm-xen/xen-public/io/ring.h> @@ -79,11 +80,20 @@ #define DPRINTK_IOCTL(_f, _a...) ((void)0) #endif -struct xlbd_type_info { - int partn_shift; - int disks_per_major; - char *devname; - char *diskname; +struct xlbd_type_info +{ + int partn_shift; + int disks_per_major; + char *devname; + char *diskname; +}; + +struct xlbd_major_info +{ + int major; + int index; + int usage; + struct xlbd_type_info *type; }; /* @@ -91,26 +101,27 @@ * hang in private_data off the gendisk structure. We may end up * putting all kinds of interesting stuff here :-) */ -struct xlbd_major_info { - int major; - int index; - int usage; - struct xlbd_type_info *type; +struct blkfront_info +{ + struct xenbus_device *xbdev; + /* We watch the backend */ + struct xenbus_watch watch; + dev_t dev; + int vdevice; + blkif_vdev_t handle; + int connected; + char *backend; + int backend_id; + int grant_id; + blkif_front_ring_t ring; + unsigned int evtchn; + struct xlbd_major_info *mi; +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0) + request_queue_t *rq; +#endif + struct work_struct work; + struct gnttab_free_callback callback; }; - -struct xlbd_disk_info { - int xd_device; - blkif_vdev_t handle; - struct xlbd_major_info *mi; -#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0) - struct xlbd_disk_info *next_waiting; - request_queue_t *rq; -#endif -}; - -typedef struct xen_block { - int usage; -} xen_block_t; extern spinlock_t blkif_io_lock; @@ -123,7 +134,7 @@ extern void do_blkif_request (request_queue_t *rq); /* Virtual block-device subsystem. */ -int xlvbd_add(blkif_sector_t capacity, int device, blkif_vdev_t handle, - u16 info, u16 sector_size); -void xlvbd_del(blkif_vdev_t handle); +int xlvbd_add(blkif_sector_t capacity, int device, + u16 vdisk_info, u16 sector_size, struct blkfront_info *info); +void xlvbd_del(struct blkfront_info *info); #endif /* __XEN_DRIVERS_BLOCK_H__ */ diff -r cd984b3478f6 -r cc5f88b719d0 linux-2.6-xen-sparse/drivers/xen/blkfront/vbd.c --- a/linux-2.6-xen-sparse/drivers/xen/blkfront/vbd.c Mon Aug 22 18:37:48 2005 +++ b/linux-2.6-xen-sparse/drivers/xen/blkfront/vbd.c Tue Aug 23 19:03:21 2005 @@ -43,325 +43,269 @@ #define NUM_SCSI_MAJORS 9 #define NUM_VBD_MAJORS 1 -struct lvdisk -{ - blkif_sector_t capacity; /* 0: Size in terms of 512-byte sectors. */ - blkif_vdev_t handle; /* 8: Device number (opaque 16 bit value). */ - u16 info; - dev_t dev; - struct list_head list; +static struct xlbd_type_info xlbd_ide_type = { + .partn_shift = 6, + .disks_per_major = 2, + .devname = "ide", + .diskname = "hd", }; -static struct xlbd_type_info xlbd_ide_type = { - .partn_shift = 6, - .disks_per_major = 2, - .devname = "ide", - .diskname = "hd", +static struct xlbd_type_info xlbd_scsi_type = { + .partn_shift = 4, + .disks_per_major = 16, + .devname = "sd", + .diskname = "sd", }; -static struct xlbd_type_info xlbd_scsi_type = { - .partn_shift = 4, - .disks_per_major = 16, - .devname = "sd", - .diskname = "sd", +static struct xlbd_type_info xlbd_vbd_type = { + .partn_shift = 4, + .disks_per_major = 16, + .devname = "xvd", + .diskname = "xvd", }; -static struct xlbd_type_info xlbd_vbd_type = { - .partn_shift = 4, - .disks_per_major = 16, - .devname = "xvd", - .diskname = "xvd", -}; - static struct xlbd_major_info *major_info[NUM_IDE_MAJORS + NUM_SCSI_MAJORS + - NUM_VBD_MAJORS]; - -#define XLBD_MAJOR_IDE_START 0 -#define XLBD_MAJOR_SCSI_START (NUM_IDE_MAJORS) -#define XLBD_MAJOR_VBD_START (NUM_IDE_MAJORS + NUM_SCSI_MAJORS) - -#define XLBD_MAJOR_IDE_RANGE XLBD_MAJOR_IDE_START ... XLBD_MAJOR_SCSI_START - 1 -#define XLBD_MAJOR_SCSI_RANGE XLBD_MAJOR_SCSI_START ... XLBD_MAJOR_VBD_START - 1 -#define XLBD_MAJOR_VBD_RANGE XLBD_MAJOR_VBD_START ... XLBD_MAJOR_VBD_START + NUM_VBD_MAJORS - 1 + NUM_VBD_MAJORS]; + +#define XLBD_MAJOR_IDE_START 0 +#define XLBD_MAJOR_SCSI_START (NUM_IDE_MAJORS) +#define XLBD_MAJOR_VBD_START (NUM_IDE_MAJORS + NUM_SCSI_MAJORS) + +#define XLBD_MAJOR_IDE_RANGE XLBD_MAJOR_IDE_START ... XLBD_MAJOR_SCSI_START - 1 +#define XLBD_MAJOR_SCSI_RANGE XLBD_MAJOR_SCSI_START ... XLBD_MAJOR_VBD_START - 1 +#define XLBD_MAJOR_VBD_RANGE XLBD_MAJOR_VBD_START ... XLBD_MAJOR_VBD_START + NUM_VBD_MAJORS - 1 /* Information about our VBDs. */ #define MAX_VBDS 64 static LIST_HEAD(vbds_list); -#define MAJOR_XEN(dev) ((dev)>>8) -#define MINOR_XEN(dev) ((dev) & 0xff) - -static struct block_device_operations xlvbd_block_fops = -{ - .owner = THIS_MODULE, - .open = blkif_open, - .release = blkif_release, - .ioctl = blkif_ioctl, +static struct block_device_operations xlvbd_block_fops = +{ + .owner = THIS_MODULE, + .open = blkif_open, + .release = blkif_release, + .ioctl = blkif_ioctl, }; spinlock_t blkif_io_lock = SPIN_LOCK_UNLOCKED; -static struct lvdisk *xlvbd_device_alloc(void) -{ - struct lvdisk *disk; - - disk = kmalloc(sizeof(*disk), GFP_KERNEL); - if (disk != NULL) { - memset(disk, 0, sizeof(*disk)); - INIT_LIST_HEAD(&disk->list); - } - return disk; -} - -static void xlvbd_device_free(struct lvdisk *disk) -{ - list_del(&disk->list); - kfree(disk); -} - -static struct xlbd_major_info *xlbd_alloc_major_info( - int major, int minor, int index) -{ - struct xlbd_major_info *ptr; - - ptr = kmalloc(sizeof(struct xlbd_major_info), GFP_KERNEL); - if (ptr == NULL) - return NULL; - - memset(ptr, 0, sizeof(struct xlbd_major_info)); - - ptr->major = major; - - switch (index) { - case XLBD_MAJOR_IDE_RANGE: - ptr->type = &xlbd_ide_type; - ptr->index = index - XLBD_MAJOR_IDE_START; - break; - case XLBD_MAJOR_SCSI_RANGE: - ptr->type = &xlbd_scsi_type; - ptr->index = index - XLBD_MAJOR_SCSI_START; - break; - case XLBD_MAJOR_VBD_RANGE: - ptr->type = &xlbd_vbd_type; - ptr->index = index - XLBD_MAJOR_VBD_START; - break; - } - - printk("Registering block device major %i\n", ptr->major); - if (register_blkdev(ptr->major, ptr->type->devname)) { - WPRINTK("can't get major %d with name %s\n", - ptr->major, ptr->type->devname); - kfree(ptr); - return NULL; - } - - devfs_mk_dir(ptr->type->devname); - major_info[index] = ptr; - return ptr; -} - -static struct xlbd_major_info *xlbd_get_major_info(int device) -{ - int major, minor, index; - - major = MAJOR_XEN(device); - minor = MINOR_XEN(device); - - switch (major) { - case IDE0_MAJOR: index = 0; break; - case IDE1_MAJOR: index = 1; break; - case IDE2_MAJOR: index = 2; break; - case IDE3_MAJOR: index = 3; break; - case IDE4_MAJOR: index = 4; break; - case IDE5_MAJOR: index = 5; break; - case IDE6_MAJOR: index = 6; break; - case IDE7_MAJOR: index = 7; break; - case IDE8_MAJOR: index = 8; break; - case IDE9_MAJOR: index = 9; break; - case SCSI_DISK0_MAJOR: index = 10; break; - case SCSI_DISK1_MAJOR ... SCSI_DISK7_MAJOR: - index = 11 + major - SCSI_DISK1_MAJOR; - break; - case SCSI_CDROM_MAJOR: index = 18; break; - default: index = 19; break; - } - - return ((major_info[index] != NULL) ? major_info[index] : - xlbd_alloc_major_info(major, minor, index)); -} - -static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size) -{ - request_queue_t *rq; - - rq = blk_init_queue(do_blkif_request, &blkif_io_lock); - if (rq == NULL) - return -1; - - elevator_init(rq, "noop"); - - /* Hard sector size and max sectors impersonate the equiv. hardware. */ - blk_queue_hardsect_size(rq, sector_size); - blk_queue_max_sectors(rq, 512); - - /* Each segment in a request is up to an aligned page in size. */ - blk_queue_segment_boundary(rq, PAGE_SIZE - 1); - blk_queue_max_segment_size(rq, PAGE_SIZE); - - /* Ensure a merged request will fit in a single I/O ring slot. */ - blk_queue_max_phys_segments(rq, BLKIF_MAX_SEGMENTS_PER_REQUEST); - blk_queue_max_hw_segments(rq, BLKIF_MAX_SEGMENTS_PER_REQUEST); - - /* Make sure buffer addresses are sector-aligned. */ - blk_queue_dma_alignment(rq, 511); - - gd->queue = rq; - - return 0; -} - -static struct gendisk *xlvbd_alloc_gendisk( - struct xlbd_major_info *mi, int minor, blkif_sector_t capacity, - int device, blkif_vdev_t handle, u16 info, u16 sector_size) -{ - struct gendisk *gd; - struct xlbd_disk_info *di; - int nr_minors = 1; - - di = kmalloc(sizeof(struct xlbd_disk_info), GFP_KERNEL); - if (di == NULL) - return NULL; - memset(di, 0, sizeof(*di)); - di->mi = mi; - di->xd_device = device; - di->handle = handle; - - if ((minor & ((1 << mi->type->partn_shift) - 1)) == 0) - nr_minors = 1 << mi->type->partn_shift; - - gd = alloc_disk(nr_minors); - if (gd == NULL) - goto out; - - if (nr_minors > 1) - sprintf(gd->disk_name, "%s%c", mi->type->diskname, - 'a' + mi->index * mi->type->disks_per_major + - (minor >> mi->type->partn_shift)); - else - sprintf(gd->disk_name, "%s%c%d", mi->type->diskname, - 'a' + mi->index * mi->type->disks_per_major + - (minor >> mi->type->partn_shift), - minor & ((1 << mi->type->partn_shift) - 1)); - - gd->major = mi->major; - gd->first_minor = minor; - gd->fops = &xlvbd_block_fops; - gd->private_data = di; - set_capacity(gd, capacity); - - if (xlvbd_init_blk_queue(gd, sector_size)) { - del_gendisk(gd); - goto out; - } - - di->rq = gd->queue; - - if (info & VDISK_READONLY) - set_disk_ro(gd, 1); - - if (info & VDISK_REMOVABLE) - gd->flags |= GENHD_FL_REMOVABLE; - - if (info & VDISK_CDROM) - gd->flags |= GENHD_FL_CD; - - add_disk(gd); - - return gd; - -out: - kfree(di); - return NULL; -} - -int xlvbd_add(blkif_sector_t capacity, int device, blkif_vdev_t handle, - u16 info, u16 sector_size) -{ - struct lvdisk *new; - struct block_device *bd; - struct gendisk *gd; - struct xlbd_major_info *mi; - - mi = xlbd_get_major_info(device); - if (mi == NULL) - return -EPERM; - - new = xlvbd_device_alloc(); - if (new == NULL) - return -ENOMEM; - new->capacity = capacity; - new->info = info; - new->handle = handle; - new->dev = MKDEV(MAJOR_XEN(device), MINOR_XEN(device)); - - bd = bdget(new->dev); - if (bd == NULL) - goto out; - - gd = xlvbd_alloc_gendisk(mi, MINOR_XEN(device), capacity, device, handle, - info, sector_size); - if (gd == NULL) - goto out_bd; - - list_add(&new->list, &vbds_list); -out_bd: - bdput(bd); -out: - return 0; -} - -static int xlvbd_device_del(struct lvdisk *disk) -{ - struct block_device *bd; - struct gendisk *gd; - struct xlbd_disk_info *di; - int ret = 0, unused; - request_queue_t *rq; - - bd = bdget(disk->dev); - if (bd == NULL) - return -1; - - gd = get_gendisk(disk->dev, &unused); - di = gd->private_data; - -#if 0 /* This is wrong: hda and hdb share same major, for example. */ - if (di->mi->usage != 0) { - WPRINTK("disk removal failed: used [dev=%x]\n", disk->dev); - ret = -1; - goto out; - } -#endif - - rq = gd->queue; - del_gendisk(gd); - put_disk(gd); - blk_cleanup_queue(rq); - - xlvbd_device_free(disk); - bdput(bd); - return ret; -} - -void xlvbd_del(blkif_vdev_t handle) -{ - struct lvdisk *i; - - list_for_each_entry(i, &vbds_list, list) { - if (i->handle == handle) { - xlvbd_device_del(i); - return; - } +static struct xlbd_major_info * +xlbd_alloc_major_info(int major, int minor, int index) +{ + struct xlbd_major_info *ptr; + + ptr = kmalloc(sizeof(struct xlbd_major_info), GFP_KERNEL); + if (ptr == NULL) + return NULL; + + memset(ptr, 0, sizeof(struct xlbd_major_info)); + + ptr->major = major; + + switch (index) { + case XLBD_MAJOR_IDE_RANGE: + ptr->type = &xlbd_ide_type; + ptr->index = index - XLBD_MAJOR_IDE_START; + break; + case XLBD_MAJOR_SCSI_RANGE: + ptr->type = &xlbd_scsi_type; + ptr->index = index - XLBD_MAJOR_SCSI_START; + break; + case XLBD_MAJOR_VBD_RANGE: + ptr->type = &xlbd_vbd_type; + ptr->index = index - XLBD_MAJOR_VBD_START; + break; } - BUG(); -} + + printk("Registering block device major %i\n", ptr->major); + if (register_blkdev(ptr->major, ptr->type->devname)) { + WPRINTK("can't get major %d with name %s\n", + ptr->major, ptr->type->devname); + kfree(ptr); + return NULL; + } + + devfs_mk_dir(ptr->type->devname); + major_info[index] = ptr; + return ptr; +} + +static struct xlbd_major_info * +xlbd_get_major_info(int vdevice) +{ + struct xlbd_major_info *mi; + int major, minor, index; + + major = BLKIF_MAJOR(vdevice); + minor = BLKIF_MINOR(vdevice); + + switch (major) { + case IDE0_MAJOR: index = 0; break; + case IDE1_MAJOR: index = 1; break; + case IDE2_MAJOR: index = 2; break; + case IDE3_MAJOR: index = 3; break; + case IDE4_MAJOR: index = 4; break; + case IDE5_MAJOR: index = 5; break; + case IDE6_MAJOR: index = 6; break; + case IDE7_MAJOR: index = 7; break; + case IDE8_MAJOR: index = 8; break; + case IDE9_MAJOR: index = 9; break; + case SCSI_DISK0_MAJOR: index = 10; break; + case SCSI_DISK1_MAJOR ... SCSI_DISK7_MAJOR: + index = 11 + major - SCSI_DISK1_MAJOR; + break; + case SCSI_CDROM_MAJOR: index = 18; break; + default: index = 19; break; + } + + mi = ((major_info[index] != NULL) ? major_info[index] : + xlbd_alloc_major_info(major, minor, index)); + mi->usage++; + return mi; +} + +static void +xlbd_put_major_info(struct xlbd_major_info *mi) +{ + mi->usage--; + /* XXX: release major if 0 */ +} + +static int +xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size) +{ + request_queue_t *rq; + + rq = blk_init_queue(do_blkif_request, &blkif_io_lock); + if (rq == NULL) + return -1; + + elevator_init(rq, "noop"); + + /* Hard sector size and max sectors impersonate the equiv. hardware. */ + blk_queue_hardsect_size(rq, sector_size); + blk_queue_max_sectors(rq, 512); + + /* Each segment in a request is up to an aligned page in size. */ + blk_queue_segment_boundary(rq, PAGE_SIZE - 1); + blk_queue_max_segment_size(rq, PAGE_SIZE); + + /* Ensure a merged request will fit in a single I/O ring slot. */ + blk_queue_max_phys_segments(rq, BLKIF_MAX_SEGMENTS_PER_REQUEST); + blk_queue_max_hw_segments(rq, BLKIF_MAX_SEGMENTS_PER_REQUEST); + + /* Make sure buffer addresses are sector-aligned. */ + blk_queue_dma_alignment(rq, 511); + + gd->queue = rq; + + return 0; +} + +static int +xlvbd_alloc_gendisk(int minor, blkif_sector_t capacity, int vdevice, + u16 vdisk_info, u16 sector_size, + struct blkfront_info *info) +{ + struct gendisk *gd; + struct xlbd_major_info *mi; + int nr_minors = 1; + int err = -ENODEV; + + mi = xlbd_get_major_info(vdevice); + if (mi == NULL) + goto out; + info->mi = mi; + + if ((minor & ((1 << mi->type->partn_shift) - 1)) == 0) + nr_minors = 1 << mi->type->partn_shift; + + gd = alloc_disk(nr_minors); + if (gd == NULL) + goto out; + + if (nr_minors > 1) + sprintf(gd->disk_name, "%s%c", mi->type->diskname, + 'a' + mi->index * mi->type->disks_per_major + + (minor >> mi->type->partn_shift)); + else + sprintf(gd->disk_name, "%s%c%d", mi->type->diskname, + 'a' + mi->index * mi->type->disks_per_major + + (minor >> mi->type->partn_shift), + minor & ((1 << mi->type->partn_shift) - 1)); + + gd->major = mi->major; + gd->first_minor = minor; + gd->fops = &xlvbd_block_fops; + gd->private_data = info; + set_capacity(gd, capacity); + + if (xlvbd_init_blk_queue(gd, sector_size)) { + del_gendisk(gd); + goto out; + } + + info->rq = gd->queue; + + if (vdisk_info & VDISK_READONLY) + set_disk_ro(gd, 1); + + if (vdisk_info & VDISK_REMOVABLE) + gd->flags |= GENHD_FL_REMOVABLE; + + if (vdisk_info & VDISK_CDROM) + gd->flags |= GENHD_FL_CD; + + add_disk(gd); + + return 0; + + out: + if (mi) + xlbd_put_major_info(mi); + return err; +} + +int +xlvbd_add(blkif_sector_t capacity, int vdevice, u16 vdisk_info, + u16 sector_size, struct blkfront_info *info) +{ + struct block_device *bd; + int err = 0; + + info->dev = MKDEV(BLKIF_MAJOR(vdevice), BLKIF_MINOR(vdevice)); + + bd = bdget(info->dev); + if (bd == NULL) + return -ENODEV; + + err = xlvbd_alloc_gendisk(BLKIF_MINOR(vdevice), capacity, vdevice, + vdisk_info, sector_size, info); + + bdput(bd); + return err; +} + +void +xlvbd_del(struct blkfront_info *info) +{ + struct block_device *bd; + struct gendisk *gd; + int unused; + request_queue_t *rq; + + bd = bdget(info->dev); + if (bd == NULL) + return; + + gd = get_gendisk(info->dev, &unused); + rq = gd->queue; + + del_gendisk(gd); + put_disk(gd); + xlbd_put_major_info(info->mi); + info->mi = NULL; + blk_cleanup_queue(rq); + + bdput(bd); +} diff -r cd984b3478f6 -r cc5f88b719d0 linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c --- a/linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c Mon Aug 22 18:37:48 2005 +++ b/linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c Tue Aug 23 19:03:21 2005 @@ -102,12 +102,12 @@ #endif #ifdef CONFIG_XEN_NETDEV_GRANT_TX -static grant_ref_t gref_tx_head, gref_tx_terminal; +static grant_ref_t gref_tx_head; static grant_ref_t grant_tx_ref[NETIF_TX_RING_SIZE + 1]; #endif #ifdef CONFIG_XEN_NETDEV_GRANT_RX -static grant_ref_t gref_rx_head, gref_rx_terminal; +static grant_ref_t gref_rx_head; static grant_ref_t grant_rx_ref[NETIF_RX_RING_SIZE + 1]; #endif @@ -441,8 +441,8 @@ np->rx->ring[MASK_NETIF_RX_IDX(req_prod + i)].req.id = id; #ifdef CONFIG_XEN_NETDEV_GRANT_RX - if (unlikely((ref = gnttab_claim_grant_reference(&gref_rx_head, - gref_rx_terminal)) < 0)) { + ref = gnttab_claim_grant_reference(&gref_rx_head); + if (unlikely(ref < 0)) { printk(KERN_ALERT "#### netfront can't claim rx reference\n"); BUG(); } @@ -537,8 +537,8 @@ tx->id = id; #ifdef CONFIG_XEN_NETDEV_GRANT_TX - if (unlikely((ref = gnttab_claim_grant_reference(&gref_tx_head, - gref_tx_terminal)) < 0)) { + ref = gnttab_claim_grant_reference(&gref_tx_head); + if (unlikely(ref < 0)) { printk(KERN_ALERT "#### netfront can't claim tx grant reference\n"); BUG(); } @@ -929,8 +929,7 @@ msg->handle = np->handle; msg->tx_shmem_frame = virt_to_mfn(np->tx); #ifdef CONFIG_XEN_NETDEV_GRANT_TX - msg->tx_shmem_ref = (u32)gnttab_claim_grant_reference(&gref_tx_head, - gref_tx_terminal); + msg->tx_shmem_ref = (u32)gnttab_claim_grant_reference(&gref_tx_head); if(msg->tx_shmem_ref < 0) { printk(KERN_ALERT "#### netfront can't claim tx_shmem reference\n"); BUG(); @@ -941,8 +940,7 @@ msg->rx_shmem_frame = virt_to_mfn(np->rx); #ifdef CONFIG_XEN_NETDEV_GRANT_RX - msg->rx_shmem_ref = (u32)gnttab_claim_grant_reference(&gref_rx_head, - gref_rx_terminal); + msg->rx_shmem_ref = (u32)gnttab_claim_grant_reference(&gref_rx_head); if(msg->rx_shmem_ref < 0) { printk(KERN_ALERT "#### netfront can't claim rx_shmem reference\n"); BUG(); @@ -1420,7 +1418,7 @@ #ifdef CONFIG_XEN_NETDEV_GRANT_TX /* A grant for every ring slot, plus one for the ring itself */ if (gnttab_alloc_grant_references(NETIF_TX_RING_SIZE + 1, - &gref_tx_head, &gref_tx_terminal) < 0) { + &gref_tx_head) < 0) { printk(KERN_ALERT "#### netfront can't alloc tx grant refs\n"); return 1; } @@ -1429,7 +1427,7 @@ #ifdef CONFIG_XEN_NETDEV_GRANT_RX /* A grant for every ring slot, plus one for the ring itself */ if (gnttab_alloc_grant_references(NETIF_RX_RING_SIZE + 1, - &gref_rx_head, &gref_rx_terminal) < 0) { + &gref_rx_head) < 0) { printk(KERN_ALERT "#### netfront can't alloc rx grant refs\n"); return 1; } @@ -1457,10 +1455,10 @@ static void netif_exit(void) { #ifdef CONFIG_XEN_NETDEV_GRANT_TX - gnttab_free_grant_references(NETIF_TX_RING_SIZE + 1, gref_tx_head); + gnttab_free_grant_references(gref_tx_head); #endif #ifdef CONFIG_XEN_NETDEV_GRANT_RX - gnttab_free_grant_references(NETIF_RX_RING_SIZE + 1, gref_rx_head); + gnttab_free_grant_references(gref_rx_head); #endif } diff -r cd984b3478f6 -r cc5f88b719d0 linux-2.6-xen-sparse/drivers/xen/privcmd/privcmd.c --- a/linux-2.6-xen-sparse/drivers/xen/privcmd/privcmd.c Mon Aug 22 18:37:48 2005 +++ b/linux-2.6-xen-sparse/drivers/xen/privcmd/privcmd.c Tue Aug 23 19:03:21 2005 @@ -167,7 +167,7 @@ if (ret) goto batch_err; - u.val = (mfn << PAGE_SHIFT) | pgprot_val(vma->vm_page_prot); + u.val = pte_val_ma(pfn_pte_ma(mfn, vma->vm_page_prot)); u.ptr = ptep; if ( unlikely(HYPERVISOR_mmu_update(&u, 1, NULL, m.dom) < 0) ) diff -r cd984b3478f6 -r cc5f88b719d0 linux-2.6-xen-sparse/include/asm-xen/asm-i386/page.h --- a/linux-2.6-xen-sparse/include/asm-xen/asm-i386/page.h Mon Aug 22 18:37:48 2005 +++ b/linux-2.6-xen-sparse/include/asm-xen/asm-i386/page.h Tue Aug 23 19:03:21 2005 @@ -60,9 +60,13 @@ #define copy_user_page(to, from, vaddr, pg) copy_page(to, from) /**** MACHINE <-> PHYSICAL CONVERSION MACROS ****/ +#define INVALID_P2M_ENTRY (~0U) +#define FOREIGN_FRAME(m) ((m) | 0x80000000U) extern unsigned int *phys_to_machine_mapping; -#define pfn_to_mfn(_pfn) ((unsigned long)(phys_to_machine_mapping[(_pfn)])) -#define mfn_to_pfn(_mfn) ((unsigned long)(machine_to_phys_mapping[(_mfn)])) +#define pfn_to_mfn(pfn) \ +((unsigned long)phys_to_machine_mapping[(unsigned int)(pfn)] & 0x7FFFFFFFUL) +#define mfn_to_pfn(mfn) \ +((unsigned long)machine_to_phys_mapping[(unsigned int)(mfn)]) /* Definitions for machine and pseudophysical addresses. */ #ifdef CONFIG_X86_PAE diff -r cd984b3478f6 -r cc5f88b719d0 linux-2.6-xen-sparse/include/asm-xen/asm-i386/pgtable-2level.h --- a/linux-2.6-xen-sparse/include/asm-xen/asm-i386/pgtable-2level.h Mon Aug 22 18:37:48 2005 +++ b/linux-2.6-xen-sparse/include/asm-xen/asm-i386/pgtable-2level.h Tue Aug 23 19:03:21 2005 @@ -63,17 +63,15 @@ * * NB2. When deliberately mapping foreign pages into the p2m table, you *must* * use FOREIGN_FRAME(). This will cause pte_pfn() to choke on it, as we - * require. In all the cases we care about, the high bit gets shifted out - * (e.g., phys_to_machine()) so behaviour there is correct. + * require. In all the cases we care about, the FOREIGN_FRAME bit is + * masked (e.g., pfn_to_mfn()) so behaviour there is correct. */ -#define INVALID_P2M_ENTRY (~0U) -#define FOREIGN_FRAME(_m) ((_m) | (1UL<<((sizeof(unsigned long)*8)-1))) #define pte_mfn(_pte) ((_pte).pte_low >> PAGE_SHIFT) #define pte_pfn(_pte) \ ({ \ unsigned long mfn = pte_mfn(_pte); \ unsigned long pfn = mfn_to_pfn(mfn); \ - if ((pfn >= max_mapnr) || (pfn_to_mfn(pfn) != mfn)) \ + if ((pfn >= max_mapnr) || (phys_to_machine_mapping[pfn] != mfn))\ pfn = max_mapnr; /* special: force !pfn_valid() */ \ pfn; \ }) diff -r cd984b3478f6 -r cc5f88b719d0 linux-2.6-xen-sparse/include/asm-xen/asm-i386/pgtable-3level.h --- a/linux-2.6-xen-sparse/include/asm-xen/asm-i386/pgtable-3level.h Mon Aug 22 18:37:48 2005 +++ b/linux-2.6-xen-sparse/include/asm-xen/asm-i386/pgtable-3level.h Tue Aug 23 19:03:21 2005 @@ -150,15 +150,13 @@ return !pte.pte_low && !pte.pte_high; } -#define INVALID_P2M_ENTRY (~0U) -#define FOREIGN_FRAME(_m) ((_m) | (1UL<<((sizeof(unsigned long)*8)-1))) #define pte_mfn(_pte) ( ((_pte).pte_low >> PAGE_SHIFT) |\ (((_pte).pte_high & 0xfff) << (32-PAGE_SHIFT)) ) #define pte_pfn(_pte) \ ({ \ unsigned long mfn = pte_mfn(_pte); \ unsigned long pfn = mfn_to_pfn(mfn); \ - if ((pfn >= max_mapnr) || (pfn_to_mfn(pfn) != mfn)) \ + if ((pfn >= max_mapnr) || (phys_to_machine_mapping[pfn] != mfn))\ pfn = max_mapnr; /* special: force !pfn_valid() */ \ pfn; \ }) diff -r cd984b3478f6 -r cc5f88b719d0 linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/page.h --- a/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/page.h Mon Aug 22 18:37:48 2005 +++ b/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/page.h Tue Aug 23 19:03:21 2005 @@ -62,9 +62,13 @@ #define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE /**** MACHINE <-> PHYSICAL CONVERSION MACROS ****/ +#define INVALID_P2M_ENTRY (~0U) +#define FOREIGN_FRAME(m) ((m) | 0x80000000U) extern u32 *phys_to_machine_mapping; -#define pfn_to_mfn(_pfn) ((unsigned long) phys_to_machine_mapping[(unsigned int)(_pfn)]) -#define mfn_to_pfn(_mfn) ((unsigned long) machine_to_phys_mapping[(unsigned int)(_mfn)]) +#define pfn_to_mfn(pfn) \ +((unsigned long)phys_to_machine_mapping[(unsigned int)(pfn)] & 0x7FFFFFFFUL) +#define mfn_to_pfn(mfn) \ +((unsigned long)machine_to_phys_mapping[(unsigned int)(mfn)]) /* Definitions for machine and pseudophysical addresses. */ typedef unsigned long paddr_t; diff -r cd984b3478f6 -r cc5f88b719d0 linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/pgtable.h --- a/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/pgtable.h Mon Aug 22 18:37:48 2005 +++ b/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/pgtable.h Tue Aug 23 19:03:21 2005 @@ -300,17 +300,15 @@ * * NB2. When deliberately mapping foreign pages into the p2m table, you *must* * use FOREIGN_FRAME(). This will cause pte_pfn() to choke on it, as we - * require. In all the cases we care about, the high bit gets shifted out - * (e.g., phys_to_machine()) so behaviour there is correct. - */ -#define INVALID_P2M_ENTRY (~0U) -#define FOREIGN_FRAME(_m) ((_m) | (1UL<<((sizeof(unsigned long)*8)-1))) + * require. In all the cases we care about, the FOREIGN_FRAME bit is + * masked (e.g., pfn_to_mfn()) so behaviour there is correct. + */ #define pte_mfn(_pte) (((_pte).pte & PTE_MASK) >> PAGE_SHIFT) #define pte_pfn(_pte) \ ({ \ unsigned long mfn = pte_mfn(_pte); \ unsigned pfn = mfn_to_pfn(mfn); \ - if ((pfn >= max_mapnr) || (pfn_to_mfn(pfn) != mfn)) \ + if ((pfn >= max_mapnr) || (phys_to_machine_mapping[pfn] != mfn))\ pfn = max_mapnr; /* special: force !pfn_valid() */ \ pfn; \ }) diff -r cd984b3478f6 -r cc5f88b719d0 linux-2.6-xen-sparse/include/asm-xen/gnttab.h --- a/linux-2.6-xen-sparse/include/asm-xen/gnttab.h Mon Aug 22 18:37:48 2005 +++ b/linux-2.6-xen-sparse/include/asm-xen/gnttab.h Tue Aug 23 19:03:21 2005 @@ -19,54 +19,46 @@ /* NR_GRANT_FRAMES must be less than or equal to that configured in Xen */ #define NR_GRANT_FRAMES 4 -#define NR_GRANT_ENTRIES (NR_GRANT_FRAMES * PAGE_SIZE / sizeof(grant_entry_t)) -int -gnttab_grant_foreign_access( - domid_t domid, unsigned long frame, int readonly); +struct gnttab_free_callback { + struct gnttab_free_callback *next; + void (*fn)(void *); + void *arg; + u16 count; +}; -void -gnttab_end_foreign_access( - grant_ref_t ref, int readonly); +int gnttab_grant_foreign_access(domid_t domid, unsigned long frame, + int readonly); -int -gnttab_grant_foreign_transfer( - domid_t domid, unsigned long pfn); +void gnttab_end_foreign_access(grant_ref_t ref, int readonly); -unsigned long -gnttab_end_foreign_transfer( - grant_ref_t ref); +int gnttab_grant_foreign_transfer(domid_t domid, unsigned long pfn); -int -gnttab_query_foreign_access( - grant_ref_t ref ); +unsigned long gnttab_end_foreign_transfer(grant_ref_t ref); + +int gnttab_query_foreign_access(grant_ref_t ref); /* * operations on reserved batches of grant references */ -int -gnttab_alloc_grant_references( - u16 count, grant_ref_t *pprivate_head, grant_ref_t *private_terminal ); +int gnttab_alloc_grant_references(u16 count, grant_ref_t *pprivate_head); -void -gnttab_free_grant_references( - u16 count, grant_ref_t private_head ); +void gnttab_free_grant_reference(grant_ref_t ref); -int -gnttab_claim_grant_reference( grant_ref_t *pprivate_head, grant_ref_t terminal -); +void gnttab_free_grant_references(grant_ref_t head); -void -gnttab_release_grant_reference( - grant_ref_t *private_head, grant_ref_t release ); +int gnttab_claim_grant_reference(grant_ref_t *pprivate_head); -void -gnttab_grant_foreign_access_ref( - grant_ref_t ref, domid_t domid, unsigned long frame, int readonly); +void gnttab_release_grant_reference(grant_ref_t *private_head, + grant_ref_t release); -void -gnttab_grant_foreign_transfer_ref( - grant_ref_t, domid_t domid, unsigned long pfn); +void gnttab_request_free_callback(struct gnttab_free_callback *callback, + void (*fn)(void *), void *arg, u16 count); +void gnttab_grant_foreign_access_ref(grant_ref_t ref, domid_t domid, + unsigned long frame, int readonly); + +void gnttab_grant_foreign_transfer_ref(grant_ref_t, domid_t domid, + unsigned long pfn); #endif /* __ASM_GNTTAB_H__ */ diff -r cd984b3478f6 -r cc5f88b719d0 tools/Makefile --- a/tools/Makefile Mon Aug 22 18:37:48 2005 +++ b/tools/Makefile Tue Aug 23 19:03:21 2005 @@ -14,6 +14,7 @@ SUBDIRS += firmware SUBDIRS += security SUBDIRS += console +SUBDIRS += xenstat .PHONY: all install clean check check_clean ioemu eioemuinstall ioemuclean diff -r cd984b3478f6 -r cc5f88b719d0 tools/Rules.mk --- a/tools/Rules.mk Mon Aug 22 18:37:48 2005 +++ b/tools/Rules.mk Tue Aug 23 19:03:21 2005 @@ -6,6 +6,7 @@ XEN_LIBXC = $(XEN_ROOT)/tools/libxc XEN_XCS = $(XEN_ROOT)/tools/xcs XEN_XENSTORE = $(XEN_ROOT)/tools/xenstore +XEN_LIBXENSTAT = $(XEN_ROOT)/tools/xenstat/libxenstat/src ifeq ($(XEN_TARGET_ARCH),x86_32) CFLAGS += -m32 -march=i686 diff -r cd984b3478f6 -r cc5f88b719d0 tools/python/xen/xm/main.py --- a/tools/python/xen/xm/main.py Mon Aug 22 18:37:48 2005 +++ b/tools/python/xen/xm/main.py Tue Aug 23 19:03:21 2005 @@ -49,6 +49,7 @@ restore <File> create a domain from a saved state file save <DomId> <File> save domain state (and config) to file shutdown <DomId> shutdown a domain + top monitor system and domains in real-time unpause <DomId> unpause a paused domain For a complete list of subcommands run 'xm help --long' @@ -87,6 +88,7 @@ dmesg [--clear] read or clear Xen's message buffer info get information about the xen host log print the xend log + top monitor system and domains in real-time Scheduler Commands: bvt <options> set BVT scheduler parameters @@ -457,6 +459,9 @@ os.execvp('/usr/libexec/xen/xenconsole', cmd.split()) console = sxp.child(info, "console") +def xm_top(args): + os.execv('/usr/sbin/xentop', ['/usr/sbin/xentop']) + def xm_dmesg(args): gopts = Opts(use="""[-c|--clear] @@ -545,6 +550,8 @@ commands = { # console commands "console": xm_console, + # xenstat commands + "top": xm_top, # domain commands "domid": xm_domid, "domname": xm_domname, diff -r cd984b3478f6 -r cc5f88b719d0 xen/arch/x86/io_apic.c --- a/xen/arch/x86/io_apic.c Mon Aug 22 18:37:48 2005 +++ b/xen/arch/x86/io_apic.c Tue Aug 23 19:03:21 2005 @@ -1751,8 +1751,30 @@ pin = (address - 0x10) >> 1; + *(u32 *)&rte = val; rte.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS); - *(int *)&rte = val; + + /* + * What about weird destination types? + * SMI: Ignore? Ought to be set up by the BIOS. + * NMI: Ignore? Watchdog functionality is Xen's concern. + * INIT: Definitely ignore: probably a guest OS bug. + * ExtINT: Ignore? Linux only asserts this at start of day. + * For now, print a message and return an error. We can fix up on demand. + */ + if ( rte.delivery_mode > dest_LowestPrio ) + { + printk("ERROR: Attempt to write weird IOAPIC destination mode!\n"); + printk(" APIC=%d/%d, lo-reg=%x\n", apicid, pin, val); + return -EINVAL; + } + + /* + * The guest does not know physical APIC arrangement (flat vs. cluster). + * Apply genapic conventions for this platform. + */ + rte.delivery_mode = INT_DELIVERY_MODE; + rte.dest_mode = INT_DEST_MODE; if ( rte.vector >= FIRST_DEVICE_VECTOR ) { diff -r cd984b3478f6 -r cc5f88b719d0 xen/arch/x86/mm.c --- a/xen/arch/x86/mm.c Mon Aug 22 18:37:48 2005 +++ b/xen/arch/x86/mm.c Tue Aug 23 19:03:21 2005 @@ -444,7 +444,7 @@ if ( unlikely(l1e_get_flags(l1e) & L1_DISALLOW_MASK) ) { - MEM_LOG("Bad L1 flags %x\n", l1e_get_flags(l1e) & L1_DISALLOW_MASK); + MEM_LOG("Bad L1 flags %x", l1e_get_flags(l1e) & L1_DISALLOW_MASK); return 0; } @@ -490,7 +490,7 @@ if ( unlikely((l2e_get_flags(l2e) & L2_DISALLOW_MASK)) ) { - MEM_LOG("Bad L2 flags %x\n", l2e_get_flags(l2e) & L2_DISALLOW_MASK); + MEM_LOG("Bad L2 flags %x", l2e_get_flags(l2e) & L2_DISALLOW_MASK); return 0; } @@ -523,7 +523,7 @@ if ( unlikely((l3e_get_flags(l3e) & L3_DISALLOW_MASK)) ) { - MEM_LOG("Bad L3 flags %x\n", l3e_get_flags(l3e) & L3_DISALLOW_MASK); + MEM_LOG("Bad L3 flags %x", l3e_get_flags(l3e) & L3_DISALLOW_MASK); return 0; } @@ -557,7 +557,7 @@ if ( unlikely((l4e_get_flags(l4e) & L4_DISALLOW_MASK)) ) { - MEM_LOG("Bad L4 flags %x\n", l4e_get_flags(l4e) & L4_DISALLOW_MASK); + MEM_LOG("Bad L4 flags %x", l4e_get_flags(l4e) & L4_DISALLOW_MASK); return 0; } @@ -1025,7 +1025,7 @@ unlikely(o != l1e_get_intpte(ol1e)) ) { MEM_LOG("Failed to update %" PRIpte " -> %" PRIpte - ": saw %" PRIpte "\n", + ": saw %" PRIpte, l1e_get_intpte(ol1e), l1e_get_intpte(nl1e), o); @@ -1051,7 +1051,7 @@ { if ( unlikely(l1e_get_flags(nl1e) & L1_DISALLOW_MASK) ) { - MEM_LOG("Bad L1 flags %x\n", + MEM_LOG("Bad L1 flags %x", l1e_get_flags(nl1e) & L1_DISALLOW_MASK); return 0; } @@ -1113,7 +1113,7 @@ { if ( unlikely(l2e_get_flags(nl2e) & L2_DISALLOW_MASK) ) { - MEM_LOG("Bad L2 flags %x\n", + MEM_LOG("Bad L2 flags %x", l2e_get_flags(nl2e) & L2_DISALLOW_MASK); return 0; } @@ -1175,7 +1175,7 @@ { if ( unlikely(l3e_get_flags(nl3e) & L3_DISALLOW_MASK) ) { - MEM_LOG("Bad L3 flags %x\n", + MEM_LOG("Bad L3 flags %x", l3e_get_flags(nl3e) & L3_DISALLOW_MASK); return 0; } @@ -1237,7 +1237,7 @@ { if ( unlikely(l4e_get_flags(nl4e) & L4_DISALLOW_MASK) ) { - MEM_LOG("Bad L4 flags %x\n", + MEM_LOG("Bad L4 flags %x", l4e_get_flags(nl4e) & L4_DISALLOW_MASK); return 0; } @@ -1598,7 +1598,7 @@ percpu_info[cpu].foreign = dom_io; break; default: - MEM_LOG("Dom %u cannot set foreign dom\n", d->domain_id); + MEM_LOG("Dom %u cannot set foreign dom", d->domain_id); okay = 0; break; } @@ -1831,7 +1831,7 @@ case MMUEXT_FLUSH_CACHE: if ( unlikely(!IS_CAPABLE_PHYSDEV(d)) ) { - MEM_LOG("Non-physdev domain tried to FLUSH_CACHE.\n"); + MEM_LOG("Non-physdev domain tried to FLUSH_CACHE."); okay = 0; } else @@ -1845,7 +1845,7 @@ if ( shadow_mode_external(d) ) { MEM_LOG("ignoring SET_LDT hypercall from external " - "domain %u\n", d->domain_id); + "domain %u", d->domain_id); okay = 0; break; } @@ -1916,7 +1916,7 @@ unlikely(IS_XEN_HEAP_FRAME(page)) ) { MEM_LOG("Transferee has no reservation headroom (%d,%d), or " - "page is in Xen heap (%lx), or dom is dying (%ld).\n", + "page is in Xen heap (%lx), or dom is dying (%ld).", e->tot_pages, e->max_pages, op.mfn, e->domain_flags); okay = 0; goto reassign_fail; @@ -1937,7 +1937,7 @@ unlikely(_nd != _d) ) { MEM_LOG("Bad page values %lx: ed=%p(%u), sd=%p," - " caf=%08x, taf=%" PRtype_info "\n", + " caf=%08x, taf=%" PRtype_info, page_to_pfn(page), d, d->domain_id, unpickle_domptr(_nd), x, page->u.inuse.type_info); okay = 0; @@ -2301,7 +2301,7 @@ if ( ((type_info & PGT_type_mask) != PGT_l1_page_table) || !get_page_type(page, type_info & (PGT_type_mask|PGT_va_mask)) ) { - DPRINTK("Grant map attempted to update a non-L1 page\n"); + MEM_LOG("Grant map attempted to update a non-L1 page"); rc = GNTST_general_error; goto failed; } @@ -2363,7 +2363,7 @@ if ( ((type_info & PGT_type_mask) != PGT_l1_page_table) || !get_page_type(page, type_info & (PGT_type_mask|PGT_va_mask)) ) { - DPRINTK("Grant map attempted to update a non-L1 page\n"); + MEM_LOG("Grant map attempted to update a non-L1 page"); rc = GNTST_general_error; goto failed; } @@ -2378,7 +2378,7 @@ /* Check that the virtual address supplied is actually mapped to frame. */ if ( unlikely((l1e_get_intpte(ol1e) >> PAGE_SHIFT) != frame) ) { - DPRINTK("PTE entry %lx for address %lx doesn't match frame %lx\n", + MEM_LOG("PTE entry %lx for address %lx doesn't match frame %lx", (unsigned long)l1e_get_intpte(ol1e), addr, frame); put_page_type(page); rc = GNTST_general_error; @@ -2388,7 +2388,7 @@ /* Delete pagetable entry. */ if ( unlikely(__put_user(0, (intpte_t *)va))) { - DPRINTK("Cannot delete PTE entry at %p.\n", va); + MEM_LOG("Cannot delete PTE entry at %p", va); put_page_type(page); rc = GNTST_general_error; goto failed; @@ -2452,7 +2452,7 @@ if ( unlikely(__get_user(ol1e.l1, &pl1e->l1) != 0) ) { - DPRINTK("Could not find PTE entry for address %lx\n", addr); + MEM_LOG("Could not find PTE entry for address %lx", addr); return GNTST_general_error; } @@ -2462,7 +2462,7 @@ */ if ( unlikely(l1e_get_pfn(ol1e) != frame) ) { - DPRINTK("PTE entry %lx for address %lx doesn't match frame %lx\n", + MEM_LOG("PTE entry %lx for address %lx doesn't match frame %lx", l1e_get_pfn(ol1e), addr, frame); return GNTST_general_error; } @@ -2470,7 +2470,7 @@ /* Delete pagetable entry. */ if ( unlikely(__put_user(0, &pl1e->l1)) ) { - DPRINTK("Cannot delete PTE entry at %p.\n", (unsigned long *)pl1e); + MEM_LOG("Cannot delete PTE entry at %p", (unsigned long *)pl1e); return GNTST_general_error; } @@ -2930,7 +2930,7 @@ if ( unlikely(!get_page_from_l1e(nl1e, d)) ) { - MEM_LOG("ptwr: Could not re-validate l1 page\n"); + MEM_LOG("ptwr: Could not re-validate l1 page"); /* * Make the remaining p.t's consistent before crashing, so the * reference counts are correct. @@ -3056,7 +3056,7 @@ /* Aligned access only, thank you. */ if ( !access_ok(addr, bytes) || ((addr & (bytes-1)) != 0) ) { - MEM_LOG("ptwr_emulate: Unaligned or bad size ptwr access (%d, %lx)\n", + MEM_LOG("ptwr_emulate: Unaligned or bad size ptwr access (%d, %lx)", bytes, addr); return X86EMUL_UNHANDLEABLE; } @@ -3089,7 +3089,7 @@ if (__copy_from_user(&pte, &linear_pg_table[l1_linear_offset(addr)], sizeof(pte))) { - MEM_LOG("ptwr_emulate: Cannot read thru linear_pg_table\n"); + MEM_LOG("ptwr_emulate: Cannot read thru linear_pg_table"); return X86EMUL_UNHANDLEABLE; } @@ -3102,7 +3102,7 @@ (page_get_owner(page) != d) ) { MEM_LOG("ptwr_emulate: Page is mistyped or bad pte " - "(%lx, %" PRtype_info ")\n", + "(%lx, %" PRtype_info ")", l1e_get_pfn(pte), page->u.inuse.type_info); return X86EMUL_UNHANDLEABLE; } diff -r cd984b3478f6 -r cc5f88b719d0 xen/arch/x86/vmx.c --- a/xen/arch/x86/vmx.c Mon Aug 22 18:37:48 2005 +++ b/xen/arch/x86/vmx.c Tue Aug 23 19:03:21 2005 @@ -1712,8 +1712,6 @@ default: __vmx_bug(®s); /* should not happen */ } - - return; } asmlinkage void load_cr2(void) diff -r cd984b3478f6 -r cc5f88b719d0 xen/arch/x86/vmx_io.c --- a/xen/arch/x86/vmx_io.c Mon Aug 22 18:37:48 2005 +++ b/xen/arch/x86/vmx_io.c Tue Aug 23 19:03:21 2005 @@ -631,7 +631,7 @@ return ((eflags & X86_EFLAGS_IF) == 0); } -asmlinkage void vmx_intr_assist() +asmlinkage void vmx_intr_assist(void) { int intr_type = 0; int highest_vector; @@ -714,8 +714,6 @@ /* We can't resume the guest if we're waiting on I/O */ ASSERT(!test_bit(ARCH_VMX_IO_WAIT, &d->arch.arch_vmx.flags)); - - /* We always check for interrupts before resuming guest */ } #endif /* CONFIG_VMX */ diff -r cd984b3478f6 -r cc5f88b719d0 xen/arch/x86/x86_32/traps.c --- a/xen/arch/x86/x86_32/traps.c Mon Aug 22 18:37:48 2005 +++ b/xen/arch/x86/x86_32/traps.c Tue Aug 23 19:03:21 2005 @@ -1,5 +1,6 @@ #include <xen/config.h> +#include <xen/domain_page.h> #include <xen/init.h> #include <xen/sched.h> #include <xen/lib.h> @@ -86,24 +87,33 @@ void show_page_walk(unsigned long addr) { - l2_pgentry_t pmd; - l1_pgentry_t *pte; - - if ( addr < PAGE_OFFSET ) - return; + unsigned long pfn = read_cr3() >> PAGE_SHIFT; + intpte_t *ptab, ent; printk("Pagetable walk from %08lx:\n", addr); - - pmd = idle_pg_table_l2[l2_linear_offset(addr)]; - printk(" L2 = %"PRIpte" %s\n", l2e_get_intpte(pmd), - (l2e_get_flags(pmd) & _PAGE_PSE) ? "(2/4MB)" : ""); - if ( !(l2e_get_flags(pmd) & _PAGE_PRESENT) || - (l2e_get_flags(pmd) & _PAGE_PSE) ) - return; - - pte = __va(l2e_get_paddr(pmd)); - pte += l1_table_offset(addr); - printk(" L1 = %"PRIpte"\n", l1e_get_intpte(*pte)); + +#ifdef CONFIG_X86_PAE + ptab = map_domain_page(pfn); + ent = ptab[l3_table_offset(addr)]; + printk(" L3 = %"PRIpte"\n", ent); + unmap_domain_page(ptab); + if ( !(ent & _PAGE_PRESENT) ) + return; + pfn = ent >> PAGE_SHIFT; +#endif + + ptab = map_domain_page(pfn); + ent = ptab[l2_table_offset(addr)]; + printk(" L2 = %"PRIpte" %s\n", ent, (ent & _PAGE_PSE) ? "(PSE)" : ""); + unmap_domain_page(ptab); + if ( !(ent & _PAGE_PRESENT) || (ent & _PAGE_PSE) ) + return; + pfn = ent >> PAGE_SHIFT; + + ptab = map_domain_page(ent >> PAGE_SHIFT); + ent = ptab[l2_table_offset(addr)]; + printk(" L1 = %"PRIpte"\n", ent); + unmap_domain_page(ptab); } #define DOUBLEFAULT_STACK_SIZE 1024 diff -r cd984b3478f6 -r cc5f88b719d0 xen/include/asm-x86/vmx.h --- a/xen/include/asm-x86/vmx.h Mon Aug 22 18:37:48 2005 +++ b/xen/include/asm-x86/vmx.h Tue Aug 23 19:03:21 2005 @@ -31,7 +31,7 @@ extern void vmx_asm_vmexit_handler(struct cpu_user_regs); extern void vmx_asm_do_resume(void); extern void vmx_asm_do_launch(void); -extern void vmx_intr_assist(); +extern void vmx_intr_assist(void); extern void arch_vmx_do_launch(struct vcpu *); extern void arch_vmx_do_resume(struct vcpu *); @@ -355,7 +355,7 @@ } /* Make sure that xen intercepts any FP accesses from current */ -static inline void vmx_stts() +static inline void vmx_stts(void) { unsigned long cr0; diff -r cd984b3478f6 -r cc5f88b719d0 xen/include/public/io/blkif.h --- a/xen/include/public/io/blkif.h Mon Aug 22 18:37:48 2005 +++ b/xen/include/public/io/blkif.h Tue Aug 23 19:03:21 2005 @@ -58,6 +58,9 @@ #define BLKIF_RSP_ERROR -1 /* non-specific 'error' */ #define BLKIF_RSP_OKAY 0 /* non-specific 'okay' */ +#define BLKIF_MAJOR(dev) ((dev)>>8) +#define BLKIF_MINOR(dev) ((dev) & 0xff) + /* * Generate blkif ring structures and types. */ diff -r cd984b3478f6 -r cc5f88b719d0 tools/xenstat/Makefile --- /dev/null Mon Aug 22 18:37:48 2005 +++ b/tools/xenstat/Makefile Tue Aug 23 19:03:21 2005 @@ -0,0 +1,13 @@ +XEN_ROOT = ../.. +include $(XEN_ROOT)/tools/Rules.mk + +SUBDIRS := +SUBDIRS += libxenstat +SUBDIRS += xentop + +.PHONY: all install clean + +all install clean: + @set -e; for subdir in $(SUBDIRS); do \ + $(MAKE) -C $$subdir $@; \ + done diff -r cd984b3478f6 -r cc5f88b719d0 tools/xenstat/libxenstat/COPYING --- /dev/null Mon Aug 22 18:37:48 2005 +++ b/tools/xenstat/libxenstat/COPYING Tue Aug 23 19:03:21 2005 @@ -0,0 +1,510 @@ + + GNU LESSER GENERAL PUBLIC LICENSE + Version 2.1, February 1999 + + Copyright (C) 1991, 1999 Free Software Foundation, Inc. + 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + +[This is the first released version of the Lesser GPL. It also counts + as the successor of the GNU Library Public License, version 2, hence + the version number 2.1.] + + Preamble + + The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +Licenses are intended to guarantee your freedom to share and change +free software--to make sure the software is free for all its users. + + This license, the Lesser General Public License, applies to some +specially designated software packages--typically libraries--of the +Free Software Foundation and other authors who decide to use it. You +can use it too, but we suggest you first think carefully about whether +this license or the ordinary General Public License is the better +strategy to use in any particular case, based on the explanations +below. + + When we speak of free software, we are referring to freedom of use, +not price. Our General Public Licenses are designed to make sure that +you have the freedom to distribute copies of free software (and charge +for this service if you wish); that you receive source code or can get +it if you want it; that you can change the software and use pieces of +it in new free programs; and that you are informed that you can do +these things. + + To protect your rights, we need to make restrictions that forbid +distributors to deny you these rights or to ask you to surrender these +rights. These restrictions translate to certain responsibilities for +you if you distribute copies of the library or if you modify it. + + For example, if you distribute copies of the library, whether gratis +or for a fee, you must give the recipients all the rights that we gave +you. You must make sure that they, too, receive or can get the source +code. If you link other code with the library, you must provide +complete object files to the recipients, so that they can relink them +with the library after making changes to the library and recompiling +it. And you must show them these terms so they know their rights. + + We protect your rights with a two-step method: (1) we copyright the +library, and (2) we offer you this license, which gives you legal +permission to copy, distribute and/or modify the library. + + To protect each distributor, we want to make it very clear that +there is no warranty for the free library. Also, if the library is +modified by someone else and passed on, the recipients should know +that what they have is not the original version, so that the original +author's reputation will not be affected by problems that might be +introduced by others. + + Finally, software patents pose a constant threat to the existence of +any free program. We wish to make sure that a company cannot +effectively restrict the users of a free program by obtaining a +restrictive license from a patent holder. Therefore, we insist that +any patent license obtained for a version of the library must be +consistent with the full freedom of use specified in this license. + + Most GNU software, including some libraries, is covered by the +ordinary GNU General Public License. This license, the GNU Lesser +General Public License, applies to certain designated libraries, and +is quite different from the ordinary General Public License. We use +this license for certain libraries in order to permit linking those +libraries into non-free programs. + + When a program is linked with a library, whether statically or using +a shared library, the combination of the two is legally speaking a +combined work, a derivative of the original library. The ordinary +General Public License therefore permits such linking only if the +entire combination fits its criteria of freedom. The Lesser General +Public License permits more lax criteria for linking other code with +the library. + + We call this license the "Lesser" General Public License because it +does Less to protect the user's freedom than the ordinary General +Public License. It also provides other free software developers Less +of an advantage over competing non-free programs. These disadvantages +are the reason we use the ordinary General Public License for many +libraries. However, the Lesser license provides advantages in certain +special circumstances. + + For example, on rare occasions, there may be a special need to +encourage the widest possible use of a certain library, so that it +becomes a de-facto standard. To achieve this, non-free programs must +be allowed to use the library. A more frequent case is that a free +library does the same job as widely used non-free libraries. In this +case, there is little to gain by limiting the free library to free +software only, so we use the Lesser General Public License. + + In other cases, permission to use a particular library in non-free +programs enables a greater number of people to use a large body of +free software. For example, permission to use the GNU C Library in +non-free programs enables many more people to use the whole GNU +operating system, as well as its variant, the GNU/Linux operating +system. + + Although the Lesser General Public License is Less protective of the +users' freedom, it does ensure that the user of a program that is +linked with the Library has the freedom and the wherewithal to run +that program using a modified version of the Library. + + The precise terms and conditions for copying, distribution and +modification follow. Pay close attention to the difference between a +"work based on the library" and a "work that uses the library". The +former contains code derived from the library, whereas the latter must +be combined with the library in order to run. + + GNU LESSER GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License Agreement applies to any software library or other +program which contains a notice placed by the copyright holder or +other authorized party saying it may be distributed under the terms of +this Lesser General Public License (also called "this License"). +Each licensee is addressed as "you". + + A "library" means a collection of software functions and/or data +prepared so as to be conveniently linked with application programs +(which use some of those functions and data) to form executables. + + The "Library", below, refers to any such software library or work +which has been distributed under these terms. A "work based on the +Library" means either the Library or any derivative work under +copyright law: that is to say, a work containing the Library or a +portion of it, either verbatim or with modifications and/or translated +straightforwardly into another language. (Hereinafter, translation is +included without limitation in the term "modification".) + + "Source code" for a work means the preferred form of the work for +making modifications to it. For a library, complete source code means +all the source code for all modules it contains, plus any associated +interface definition files, plus the scripts used to control +compilation and installation of the library. + + Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running a program using the Library is not restricted, and output from +such a program is covered only if its contents constitute a work based +on the Library (independent of the use of the Library in a tool for +writing it). Whether that is true depends on what the Library does +and what the program that uses the Library does. + + 1. You may copy and distribute verbatim copies of the Library's +complete source code as you receive it, in any medium, provided that +you conspicuously and appropriately publish on each copy an +appropriate copyright notice and disclaimer of warranty; keep intact +all the notices that refer to this License and to the absence of any +warranty; and distribute a copy of this License along with the +Library. + + You may charge a fee for the physical act of transferring a copy, +and you may at your option offer warranty protection in exchange for a +fee. + + 2. You may modify your copy or copies of the Library or any portion +of it, thus forming a work based on the Library, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + a) The modified work must itself be a software library. + + b) You must cause the files modified to carry prominent notices + stating that you changed the files and the date of any change. + + c) You must cause the whole of the work to be licensed at no + charge to all third parties under the terms of this License. + + d) If a facility in the modified Library refers to a function or a + table of data to be supplied by an application program that uses + the facility, other than as an argument passed when the facility + is invoked, then you must make a good faith effort to ensure that, + in the event an application does not supply such function or + table, the facility still operates, and performs whatever part of + its purpose remains meaningful. + + (For example, a function in a library to compute square roots has + a purpose that is entirely well-defined independent of the + application. Therefore, Subsection 2d requires that any + application-supplied function or table used by this function must + be optional: if the application does not supply it, the square + root function must still compute square roots.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Library, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Library, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote +it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Library. + +In addition, mere aggregation of another work not based on the Library +with the Library (or with a work based on the Library) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + + 3. You may opt to apply the terms of the ordinary GNU General Public +License instead of this License to a given copy of the Library. To do +this, you must alter all the notices that refer to this License, so +that they refer to the ordinary GNU General Public License, version 2, +instead of to this License. (If a newer version than version 2 of the +ordinary GNU General Public License has appeared, then you can specify +that version instead if you wish.) Do not make any other change in +these notices. + + Once this change is made in a given copy, it is irreversible for +that copy, so the ordinary GNU General Public License applies to all +subsequent copies and derivative works made from that copy. + + This option is useful when you wish to copy part of the code of +the Library into a program that is not a library. + + 4. You may copy and distribute the Library (or a portion or +derivative of it, under Section 2) in object code or executable form +under the terms of Sections 1 and 2 above provided that you accompany +it with the complete corresponding machine-readable source code, which +must be distributed under the terms of Sections 1 and 2 above on a +medium customarily used for software interchange. + + If distribution of object code is made by offering access to copy +from a designated place, then offering equivalent access to copy the +source code from the same place satisfies the requirement to +distribute the source code, even though third parties are not +compelled to copy the source along with the object code. + + 5. A program that contains no derivative of any portion of the +Library, but is designed to work with the Library by being compiled or +linked with it, is called a "work that uses the Library". Such a +work, in isolation, is not a derivative work of the Library, and +therefore falls outside the scope of this License. + + However, linking a "work that uses the Library" with the Library +creates an executable that is a derivative of the Library (because it +contains portions of the Library), rather than a "work that uses the +library". The executable is therefore covered by this License. +Section 6 states terms for distribution of such executables. + + When a "work that uses the Library" uses material from a header file +that is part of the Library, the object code for the work may be a +derivative work of the Library even though the source code is not. +Whether this is true is especially significant if the work can be +linked without the Library, or if the work is itself a library. The +threshold for this to be true is not precisely defined by law. + + If such an object file uses only numerical parameters, data +structure layouts and accessors, and small macros and small inline +functions (ten lines or less in length), then the use of the object +file is unrestricted, regardless of whether it is legally a derivative +work. (Executables containing this object code plus portions of the +Library will still fall under Section 6.) + + Otherwise, if the work is a derivative of the Library, you may +distribute the object code for the work under the terms of Section 6. +Any executables containing that work also fall under Section 6, +whether or not they are linked directly with the Library itself. + + 6. As an exception to the Sections above, you may also combine or +link a "work that uses the Library" with the Library to produce a +work containing portions of the Library, and distribute that work +under terms of your choice, provided that the terms permit +modification of the work for the customer's own use and reverse +engineering for debugging such modifications. + + You must give prominent notice with each copy of the work that the +Library is used in it and that the Library and its use are covered by +this License. You must supply a copy of this License. If the work +during execution displays copyright notices, you must include the +copyright notice for the Library among them, as well as a reference +directing the user to the copy of this License. Also, you must do one +of these things: + + a) Accompany the work with the complete corresponding + machine-readable source code for the Library including whatever + changes were used in the work (which must be distributed under + Sections 1 and 2 above); and, if the work is an executable linked + with the Library, with the complete machine-readable "work that + uses the Library", as object code and/or source code, so that the + user can modify the Library and then relink to produce a modified + executable containing the modified Library. (It is understood + that the user who changes the contents of definitions files in the + Library will not necessarily be able to recompile the application + to use the modified definitions.) + + b) Use a suitable shared library mechanism for linking with the + Library. A suitable mechanism is one that (1) uses at run time a + copy of the library already present on the user's computer system, + rather than copying library functions into the executable, and (2) + will operate properly with a modified version of the library, if + the user installs one, as long as the modified version is + interface-compatible with the version that the work was made with. + + c) Accompany the work with a written offer, valid for at least + three years, to give the same user the materials specified in + Subsection 6a, above, for a charge no more than the cost of + performing this distribution. + + d) If distribution of the work is made by offering access to copy + from a designated place, offer equivalent access to copy the above + specified materials from the same place. + + e) Verify that the user has already received a copy of these + materials or that you have already sent this user a copy. + + For an executable, the required form of the "work that uses the +Library" must include any data and utility programs needed for +reproducing the executable from it. However, as a special exception, +the materials to be distributed need not include anything that is +normally distributed (in either source or binary form) with the major +components (compiler, kernel, and so on) of the operating system on +which the executable runs, unless that component itself accompanies +the executable. + + It may happen that this requirement contradicts the license +restrictions of other proprietary libraries that do not normally +accompany the operating system. Such a contradiction means you cannot +use both them and the Library together in an executable that you +distribute. + + 7. You may place library facilities that are a work based on the +Library side-by-side in a single library together with other library +facilities not covered by this License, and distribute such a combined +library, provided that the separate distribution of the work based on +the Library and of the other library facilities is otherwise +permitted, and provided that you do these two things: + + a) Accompany the combined library with a copy of the same work + based on the Library, uncombined with any other library + facilities. This must be distributed under the terms of the + Sections above. + + b) Give prominent notice with the combined library of the fact + that part of it is a work based on the Library, and explaining + where to find the accompanying uncombined form of the same work. + + 8. You may not copy, modify, sublicense, link with, or distribute +the Library except as expressly provided under this License. Any +attempt otherwise to copy, modify, sublicense, link with, or +distribute the Library is void, and will automatically terminate your +rights under this License. However, parties who have received copies, +or rights, from you under this License will not have their licenses +terminated so long as such parties remain in full compliance. + + 9. You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Library or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Library (or any work based on the +Library), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Library or works based on it. + + 10. Each time you redistribute the Library (or any work based on the +Library), the recipient automatically receives a license from the +original licensor to copy, distribute, link with or modify the Library +subject to these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties with +this License. + + 11. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Library at all. For example, if a patent +license would not permit royalty-free redistribution of the Library by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Library. + +If any portion of this section is held invalid or unenforceable under +any particular circumstance, the balance of the section is intended to +apply, and the section as a whole is intended to apply in other +circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + + 12. If the distribution and/or use of the Library is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Library under this License +may add an explicit geographical distribution limitation excluding those +countries, so that distribution is permitted only in or among +countries not thus excluded. In such case, this License incorporates +the limitation as if written in the body of this License. + + 13. The Free Software Foundation may publish revised and/or new +versions of the Lesser General Public License from time to time. +Such new versions will be similar in spirit to the present version, +but may differ in detail to address new problems or concerns. + +Each version is given a distinguishing version number. If the Library +specifies a version number of this License which applies to it and +"any later version", you have the option of following the terms and +conditions either of that version or of any later version published by +the Free Software Foundation. If the Library does not specify a +license version number, you may choose any version ever published by +the Free Software Foundation. + + 14. If you wish to incorporate parts of the Library into other free +programs whose distribution conditions are incompatible with these, +write to the author to ask for permission. For software which is +copyrighted by the Free Software Foundation, write to the Free +Software Foundation; we sometimes make exceptions for this. Our +decision will be guided by the two goals of preserving the free status +of all derivatives of our free software and of promoting the sharing +and reuse of software generally. + + NO WARRANTY + + 15. BECAUSE THE LIBRARY IS LICENSED FREE OF CHARGE, THERE IS NO +WARRANTY FOR THE LIBRARY, TO THE EXTENT PERMITTED BY APPLICABLE LAW. +EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR +OTHER PARTIES PROVIDE THE LIBRARY "AS IS" WITHOUT WARRANTY OF ANY +KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE +LIBRARY IS WITH YOU. SHOULD THE LIBRARY PROVE DEFECTIVE, YOU ASSUME +THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. + + 16. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN +WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY +AND/OR REDISTRIBUTE THE LIBRARY AS PERMITTED ABOVE, BE LIABLE TO YOU +FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR +CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE +LIBRARY (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING +RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A +FAILURE OF THE LIBRARY TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF +SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH +DAMAGES. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Libraries + + If you develop a new library, and you want it to be of the greatest +possible use to the public, we recommend making it free software that +everyone can redistribute and change. You can do so by permitting +redistribution under these terms (or, alternatively, under the terms +of the ordinary General Public License). + + To apply these terms, attach the following notices to the library. +It is safest to attach them to the start of each source file to most +effectively convey the exclusion of warranty; and each file should +have at least the "copyright" line and a pointer to where the full +notice is found. + + + <one line to give the library's name and a brief idea of what it does.> + Copyright (C) <year> <name of author> + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + +Also add information on how to contact you by electronic and paper mail. + +You should also get your employer (if you work as a programmer) or +your school, if any, to sign a "copyright disclaimer" for the library, +if necessary. Here is a sample; alter the names: + + Yoyodyne, Inc., hereby disclaims all copyright interest in the + library `Frob' (a library for tweaking knobs) written by James + Random Hacker. + + <signature of Ty Coon>, 1 April 1990 + Ty Coon, President of Vice + +That's all there is to it! + + diff -r cd984b3478f6 -r cc5f88b719d0 tools/xenstat/libxenstat/Makefile --- /dev/null Mon Aug 22 18:37:48 2005 +++ b/tools/xenstat/libxenstat/Makefile Tue Aug 23 19:03:21 2005 @@ -0,0 +1,142 @@ +# libxenstat: statistics-collection library for Xen +# Copyright (C) International Business Machines Corp., 2005 +# Author: Josh Triplett <josht@xxxxxxxxxx> +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. + +XEN_ROOT=../../.. +include $(XEN_ROOT)/tools/Rules.mk +LINUX_ROOT := $(XEN_ROOT)/linux-2.6-xen-sparse + +INSTALL = install +INSTALL_PROG = $(INSTALL) -m0755 -D +INSTALL_DATA = $(INSTALL) -m0644 -D + +prefix=/usr +includedir=$(prefix)/include +libdir=$(prefix)/lib + +LDCONFIG=ldconfig +MAKE_LINK=ln -sf + +MAJOR=0 +MINOR=0 + +LIB=src/libxenstat.a +SHLIB=src/libxenstat.so.$(MAJOR).$(MINOR) +SHLIB_LINKS=src/libxenstat.so.$(MAJOR) src/libxenstat.so +OBJECTS=src/xenstat.o src/xen-interface.o +SONAME_FLAGS=-Wl,-soname -Wl,libxenstat.so.$(MAJOR) + +WARN_FLAGS=-Wall -Werror + +CFLAGS+=-Isrc +CFLAGS+=-I$(XEN_ROOT)/xen/include/public +CFLAGS+=-I$(LINUX_ROOT)/include/asm-xen/linux-public/ +LDFLAGS+=-Lsrc + +all: $(LIB) + +$(LIB): $(OBJECTS) + $(AR) rc $@ $^ + $(RANLIB) $@ + +$(SHLIB): $(OBJECTS) + $(CC) $(LDFLAGS) $(SONAME_FLAGS) -shared -o $@ $(OBJECTS) + +src/xenstat.o: src/xenstat.c src/xenstat.h src/xen-interface.h + $(CC) $(CFLAGS) $(WARN_FLAGS) -c -o $@ $< + +src/xen-interface.o: src/xen-interface.c src/xen-interface.h + $(CC) $(CFLAGS) $(WARN_FLAGS) -c -o $@ $< + +src/libxenstat.so.$(MAJOR): $(LIB) + $(MAKE_LINK) $(<F) $@ + +src/libxenstat.so: src/libxenstat.so.$(MAJOR) + $(MAKE_LINK) $(<F) $@ + +install: all +#install: all +# $(INSTALL_DATA) src/xenstat.h $(DESTDIR)$(includedir)/xenstat.h +# $(INSTALL_PROG) $(LIB) $(DESTDIR)$(libdir)/libxenstat.a +# $(INSTALL_PROG) $(SHLIB) \ +# $(DESTDIR)$(libdir)/libxenstat.so.$(MAJOR).$(MINOR) +# $(MAKE_LINK) libxenstat.so.$(MAJOR).$(MINOR) \ +# $(DESTDIR)$(libdir)/libxenstat.so.$(MAJOR) +# $(MAKE_LINK) libxenstat.so.$(MAJOR) \ +# $(DESTDIR)$(libdir)/libxenstat.so +# -$(LDCONFIG) + +PYLIB=bindings/swig/python/_xenstat.so +PYMOD=bindings/swig/python/xenstat.py +PYSRC=bindings/swig/python/_xenstat.c +PERLLIB=bindings/swig/perl/xenstat.so +PERLMOD=bindings/swig/perl/xenstat.pm +PERLSRC=bindings/swig/perl/xenstat.c +BINDINGS=$(PYLIB) $(PYMOD) $(PERLLIB) $(PERLMOD) +BINDINGSRC=$(PYSRC) $(PERLSRC) + +# The all-bindings target builds all the language bindings +all-bindings: perl-bindings python-bindings + +# The install-bindings target installs all the language bindings +install-bindings: install-perl-bindings install-python-bindings + +$(BINDINGS): $(SHLIB) $(SHLIB_LINKS) src/xenstat.h + +SWIG_FLAGS=-module xenstat -Isrc + +# Python bindings +PYTHON_VERSION=2.3 +PYTHON_FLAGS=-I/usr/include/python$(PYTHON_VERSION) -lpython$(PYTHON_VERSION) +$(PYSRC) $(PYMOD): bindings/swig/xenstat.i + swig -python $(SWIG_FLAGS) -outdir $(@D) -o $(PYSRC) $< + +$(PYLIB): $(PYSRC) + $(CC) $(CFLAGS) $(LDFLAGS) $(PYTHON_FLAGS) -shared -lxenstat -o $@ $< + +python-bindings: $(PYLIB) $(PYMOD) + +pythonlibdir=$(prefix)/lib/python$(PYTHON_VERSION)/site-packages +install-python-bindings: $(PYLIB) $(PYMOD) + $(INSTALL_PROG) $(PYLIB) $(DESTDIR)$(pythonlibdir)/_xenstat.so + $(INSTALL_PROG) $(PYMOD) $(DESTDIR)$(pythonlibdir)/xenstat.py + +ifeq ($(XENSTAT_PYTHON_BINDINGS),y) +all: python-bindings +install: install-python-bindings +endif + +# Perl bindings +PERL_FLAGS=`perl -MConfig -e 'print "$$Config{ccflags} -I$$Config{archlib}/CORE";'` +$(PERLSRC) $(PERLMOD): bindings/swig/xenstat.i + swig -perl $(SWIG_FLAGS) -outdir $(@D) -o $(PERLSRC) $< + +$(PERLLIB): $(PERLSRC) + $(CC) $(CFLAGS) $(LDFLAGS) $(PERL_FLAGS) -shared -lxenstat -o $@ $< + +perl-bindings: $(PERLLIB) $(PERLMOD) + +perllibdir=$(prefix)/lib/perl5 +perlmoddir=$(prefix)/share/perl5 +install-perl-bindings: $(PERLLIB) $(PERLMOD) + $(INSTALL_PROG) $(PERLLIB) $(DESTDIR)$(perllibdir)/xenstat.so + $(INSTALL_PROG) $(PERLMOD) $(DESTDIR)$(perlmoddir)/xenstat.pm + +ifeq ($(XENSTAT_PERL_BINDINGS),y) +all: perl-bindings +install: install-perl-bindings +endif + +clean: + rm -f $(LIB) $(SHLIB) $(SHLIB_LINKS) $(OBJECTS) \ + $(BINDINGS) $(BINDINGSRC) diff -r cd984b3478f6 -r cc5f88b719d0 tools/xenstat/libxenstat/bindings/swig/perl/.empty --- /dev/null Mon Aug 22 18:37:48 2005 +++ b/tools/xenstat/libxenstat/bindings/swig/perl/.empty Tue Aug 23 19:03:21 2005 @@ -0,0 +1,1 @@ +This directory is empty; this file is included to prevent version control systems from removing the directory. diff -r cd984b3478f6 -r cc5f88b719d0 tools/xenstat/libxenstat/bindings/swig/python/.empty --- /dev/null Mon Aug 22 18:37:48 2005 +++ b/tools/xenstat/libxenstat/bindings/swig/python/.empty Tue Aug 23 19:03:21 2005 @@ -0,0 +1,1 @@ +This directory is empty; this file is included to prevent version control systems from removing the directory. diff -r cd984b3478f6 -r cc5f88b719d0 tools/xenstat/libxenstat/bindings/swig/xenstat.i --- /dev/null Mon Aug 22 18:37:48 2005 +++ b/tools/xenstat/libxenstat/bindings/swig/xenstat.i Tue Aug 23 19:03:21 2005 @@ -0,0 +1,8 @@ +%module xenstat_swig +%{ +/* Includes the header in the wrapper code */ +#include "xenstat.h" +%} + +/* Parse the header file to generate wrappers */ +%include "xenstat.h" diff -r cd984b3478f6 -r cc5f88b719d0 tools/xenstat/libxenstat/src/xen-interface.c --- /dev/null Mon Aug 22 18:37:48 2005 +++ b/tools/xenstat/libxenstat/src/xen-interface.c Tue Aug 23 19:03:21 2005 @@ -0,0 +1,204 @@ +/* xen-interface.c + * + * Copyright (C) International Business Machines Corp., 2005 + * Authors: Josh Triplett <josht@xxxxxxxxxx> + * Judy Fischbach <jfisch@xxxxxxxxxx> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + */ + +#include "xen-interface.h" +#include <fcntl.h> +#include <sys/ioctl.h> +#include <sys/mman.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include "version.h" +#include "privcmd.h" +#include "xen.h" + +struct xi_handle { + int fd; +}; + +/* Initialize for xen-interface. Returns a handle to be used with subsequent + * calls to the xen-interface functions or NULL if an error occurs. */ +xi_handle *xi_init() +{ + xi_handle *handle; + + handle = (xi_handle *)calloc(1, sizeof(xi_handle)); + if (handle == NULL) + return NULL; + + handle->fd = open("/proc/xen/privcmd", O_RDWR); + if (handle->fd < 0) { + perror("Couldn't open /proc/xen/privcmd"); + free(handle); + return NULL; + } + + return handle; +} + +/* Release the handle to libxc, free resources, etc. */ +void xi_uninit(xi_handle *handle) +{ + close (handle->fd); + free (handle); +} + +/* Make simple xen version hypervisor calls */ +static int xi_make_xen_version_hypercall(xi_handle *handle, long *vnum, xen_extraversion_t *ver) +{ + privcmd_hypercall_t privcmd; + multicall_entry_t multicall[2]; + int ret = 0; + + /* set up for doing hypercall */ + privcmd.op = __HYPERVISOR_multicall; + privcmd.arg[0] = (unsigned long)multicall; + privcmd.arg[1] = 2; + + /* first one to get xen version number */ + multicall[0].op = __HYPERVISOR_xen_version; + multicall[0].args[0] = (unsigned long)XENVER_version; + + /* second to get xen version flag */ + multicall[1].op = __HYPERVISOR_xen_version; + multicall[1].args[0] = (unsigned long)XENVER_extraversion; + multicall[1].args[1] = (unsigned long)ver; + + if (mlock( &privcmd, sizeof(privcmd_hypercall_t)) < 0) { + perror("Failed to mlock privcmd structure"); + return -1; + } + + if (mlock( multicall, sizeof(multicall_entry_t)) < 0) { + perror("Failed to mlock multicall_entry structure"); + munlock( &multicall, sizeof(multicall_entry_t)); + return -1; + } + + if (ioctl( handle->fd, IOCTL_PRIVCMD_HYPERCALL, &privcmd) < 0) { + perror("Hypercall failed"); + ret = -1; + } + + *vnum = multicall[0].result; + + munlock( &privcmd, sizeof(privcmd_hypercall_t)); + munlock( &multicall, sizeof(multicall_entry_t)); + + return ret; +} + +/* Make Xen Dom0 op hypervisor call */ +static int xi_make_dom0_op(xi_handle *handle, dom0_op_t *dom_op, int dom_opcode) +{ + privcmd_hypercall_t privcmd; + int ret = 0; + + /* set up for doing hypercall */ + privcmd.op = __HYPERVISOR_dom0_op; + privcmd.arg[0] = (unsigned long)dom_op; + dom_op->cmd = dom_opcode; + dom_op->interface_version = DOM0_INTERFACE_VERSION; + + if (mlock( &privcmd, sizeof(privcmd_hypercall_t)) < 0) { + perror("Failed to mlock privcmd structure"); + return -1; + } + + if (mlock( dom_op, sizeof(dom0_op_t)) < 0) { + perror("Failed to mlock dom0_op structure"); + munlock( &privcmd, sizeof(privcmd_hypercall_t)); + return -1; + } + + if (ioctl( handle->fd, IOCTL_PRIVCMD_HYPERCALL, &privcmd) < 0) { + perror("Hypercall failed"); + ret = -1; + } + + munlock( &privcmd, sizeof(privcmd_hypercall_t)); + munlock( dom_op, sizeof(dom0_op_t)); + + return ret; +} + +/* Obtain domain data from dom0 */ +int xi_get_physinfo(xi_handle *handle, dom0_physinfo_t *physinfo) +{ + dom0_op_t op; + + if (xi_make_dom0_op(handle, &op, DOM0_PHYSINFO) < 0) { + perror("DOM0_PHYSINFO Hypercall failed"); + return -1; + } + + *physinfo = op.u.physinfo; + return 0; +} + +/* Obtain domain data from dom0 */ +int xi_get_domaininfolist(xi_handle *handle, dom0_getdomaininfo_t *info, + unsigned int first_domain, unsigned int max_domains) +{ + dom0_op_t op; + op.u.getdomaininfolist.first_domain = first_domain; + op.u.getdomaininfolist.max_domains = max_domains; + op.u.getdomaininfolist.buffer = info; + + if (mlock( info, max_domains * sizeof(dom0_getdomaininfo_t)) < 0) { + perror("Failed to mlock domaininfo array"); + return -1; + } + + if (xi_make_dom0_op(handle, &op, DOM0_GETDOMAININFOLIST) < 0) { + perror("DOM0_GETDOMAININFOLIST Hypercall failed"); + return -1; + } + + return op.u.getdomaininfolist.num_domains; +} + +/* Returns cpu usage data from dom0 */ +long long xi_get_vcpu_usage(xi_handle *handle, unsigned int domain, + unsigned int vcpu) +{ + dom0_op_t op; + op.u.getvcpucontext.domain = domain; + op.u.getvcpucontext.vcpu = vcpu; + op.u.getvcpucontext.ctxt = NULL; + + if (xi_make_dom0_op(handle, &op, DOM0_GETVCPUCONTEXT) < 0) { + perror("DOM0_GETVCPUCONTEXT Hypercall failed"); + return -1; + } + + return op.u.getvcpucontext.cpu_time; +} + +/* gets xen version information from hypervisor */ +int xi_get_xen_version(xi_handle *handle, long *vnum, xen_extraversion_t *ver) +{ + + /* gets the XENVER_version and XENVER_extraversion */ + if (xi_make_xen_version_hypercall( handle, vnum, ver) < 0) {; + perror("XEN VERSION Hypercall failed"); + return -1; + } + + return 0; +} diff -r cd984b3478f6 -r cc5f88b719d0 tools/xenstat/libxenstat/src/xen-interface.h --- /dev/null Mon Aug 22 18:37:48 2005 +++ b/tools/xenstat/libxenstat/src/xen-interface.h Tue Aug 23 19:03:21 2005 @@ -0,0 +1,53 @@ +/* xen-interface.h + * + * Copyright (C) International Business Machines Corp., 2005 + * Authors: Josh Triplett <josht@xxxxxxxxxx> + * Judy Fischbach <jfisch@xxxxxxxxxx> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + */ + +#include <stdint.h> + +typedef int8_t s8; +typedef int16_t s16; +typedef int32_t s32; +typedef int64_t s64; +typedef uint8_t u8; +typedef uint16_t u16; +typedef uint32_t u32; +typedef uint64_t u64; + +#include "dom0_ops.h" +#include "version.h" + +/* Opaque handles */ +typedef struct xi_handle xi_handle; + +/* Initialize for xen-interface. Returns a handle to be used with subsequent + * calls to the xen-interface functions or NULL if an error occurs. */ +xi_handle *xi_init(); + +/* Release the handle to libxc, free resources, etc. */ +void xi_uninit(xi_handle *handle); + +/* Obtain xen version information from hypervisor */ +int xi_get_xen_version(xi_handle *, long *vnum, xen_extraversion_t *ver); + +/* Obtain physinfo data from dom0 */ +int xi_get_physinfo(xi_handle *, dom0_physinfo_t *); + +/* Obtain domain data from dom0 */ +int xi_get_domaininfolist(xi_handle *, dom0_getdomaininfo_t *, unsigned int, + unsigned int); + +/* Returns cpu usage data from dom0 */ +long long xi_get_vcpu_usage(xi_handle *, unsigned int, unsigned int); diff -r cd984b3478f6 -r cc5f88b719d0 tools/xenstat/libxenstat/src/xenstat.c --- /dev/null Mon Aug 22 18:37:48 2005 +++ b/tools/xenstat/libxenstat/src/xenstat.c Tue Aug 23 19:03:21 2005 @@ -0,0 +1,640 @@ +/* libxenstat: statistics-collection library for Xen + * Copyright (C) International Business Machines Corp., 2005 + * Authors: Josh Triplett <josht@xxxxxxxxxx> + * Judy Fischbach <jfisch@xxxxxxxxxx> + * David Hendricks <dhendrix@xxxxxxxxxx> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + */ + +#include <limits.h> +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <unistd.h> +#include <xen-interface.h> +#include "xenstat.h" +#include "version.h" + +/* + * Types + */ +struct xenstat_handle { + xi_handle *xihandle; + int page_size; + FILE *procnetdev; +}; + +#define SHORT_ASC_LEN 5 /* length of 65535 */ +#define VERSION_SIZE (2 * SHORT_ASC_LEN + 1 + sizeof(xen_extraversion_t) + 1) + +struct xenstat_node { + unsigned int flags; + unsigned long long cpu_hz; + unsigned int num_cpus; + unsigned long long tot_mem; + unsigned long long free_mem; + unsigned int num_domains; + char xen_version[VERSION_SIZE]; /* xen version running on this node */ + xenstat_domain *domains; /* Array of length num_domains */ +}; + +struct xenstat_domain { + unsigned int id; + unsigned int state; + unsigned long long cpu_ns; + unsigned int num_vcpus; + xenstat_vcpu *vcpus; /* Array of length num_vcpus */ + unsigned long long cur_mem; /* Current memory reservation */ + unsigned long long max_mem; /* Total memory allowed */ + unsigned int ssid; + unsigned int num_networks; + xenstat_network *networks; /* Array of length num_networks */ +}; + +struct xenstat_vcpu { + unsigned long long ns; +}; + +struct xenstat_network { + unsigned int id; + /* Received */ + unsigned long long rbytes; + unsigned long long rpackets; + unsigned long long rerrs; + unsigned long long rdrop; + /* Transmitted */ + unsigned long long tbytes; + unsigned long long tpackets; + unsigned long long terrs; + unsigned long long tdrop; +}; + +/* + * Data-collection types + */ +/* Called to collect the information for the node and all the domains on + * it. When called, the domain information has already been collected. */ +typedef int (*xenstat_collect_func)(xenstat_handle * handle, + xenstat_node * node); +/* Called to free the information collected by the collect function. The free + * function will only be called on a xenstat_node if that node includes + * information collected by the corresponding collector. */ +typedef void (*xenstat_free_func)(xenstat_node * node); +/* Called to free any information stored in the handle. Note the lack of a + * matching init function; the collect functions should initialize on first + * use. Also, the uninit function must handle the case that the collector has + * never been initialized. */ +typedef void (*xenstat_uninit_func)(xenstat_handle * handle); +typedef struct xenstat_collector { + unsigned int flag; + xenstat_collect_func collect; + xenstat_free_func free; + xenstat_uninit_func uninit; +} xenstat_collector; + +static int xenstat_collect_vcpus(xenstat_handle * handle, + xenstat_node * node); +static int xenstat_collect_networks(xenstat_handle * handle, + xenstat_node * node); +static void xenstat_free_vcpus(xenstat_node * node); +static void xenstat_free_networks(xenstat_node * node); +static void xenstat_uninit_vcpus(xenstat_handle * handle); +static void xenstat_uninit_networks(xenstat_handle * handle); + +static xenstat_collector collectors[] = { + { XENSTAT_VCPU, xenstat_collect_vcpus, + xenstat_free_vcpus, xenstat_uninit_vcpus }, + { XENSTAT_NETWORK, xenstat_collect_networks, + xenstat_free_networks, xenstat_uninit_networks } +}; + +#define NUM_COLLECTORS (sizeof(collectors)/sizeof(xenstat_collector)) + +/* + * libxenstat API + */ +xenstat_handle *xenstat_init() +{ + xenstat_handle *handle; + + handle = (xenstat_handle *) calloc(1, sizeof(xenstat_handle)); + if (handle == NULL) + return NULL; + +#if defined(PAGESIZE) + handle->page_size = PAGESIZE; +#elif defined(PAGE_SIZE) + handle->page_size = PAGE_SIZE; +#else + handle->page_size = sysconf(_SC_PAGE_SIZE); + if (handle->page_size < 0) { + perror("Failed to retrieve page size."); + free(handle); + return NULL; + } +#endif + + handle->xihandle = xi_init(); + if (handle->xihandle == NULL) { + perror("xi_init"); + free(handle); + return NULL; + } + + return handle; +} + +void xenstat_uninit(xenstat_handle * handle) +{ + unsigned int i; + if (handle) { + for (i = 0; i < NUM_COLLECTORS; i++) + collectors[i].uninit(handle); + xi_uninit(handle->xihandle); + free(handle); + } +} + +xenstat_node *xenstat_get_node(xenstat_handle * handle, unsigned int flags) +{ +#define DOMAIN_CHUNK_SIZE 256 + xenstat_node *node; + dom0_physinfo_t physinfo; + xen_extraversion_t version; + long vnum = 0; + dom0_getdomaininfo_t domaininfo[DOMAIN_CHUNK_SIZE]; + unsigned int num_domains, new_domains; + unsigned int i; + + /* Create the node */ + node = (xenstat_node *) calloc(1, sizeof(xenstat_node)); + if (node == NULL) + return NULL; + + /* Get information about the physical system */ + if (xi_get_physinfo(handle->xihandle, &physinfo) < 0) { + free(node); + return NULL; + } + + /* Get the xen version number and xen version tag */ + if (xi_get_xen_version(handle->xihandle, &vnum, &version) < 0) { + free(node); + return NULL; + } + snprintf(node->xen_version, VERSION_SIZE, + "%ld.%ld%s\n", ((vnum >> 16) & 0xFFFF), vnum & 0xFFFF, (char *)version); + + node->cpu_hz = ((unsigned long long)physinfo.cpu_khz) * 1000ULL; + node->num_cpus = + (physinfo.threads_per_core * physinfo.cores_per_socket * + physinfo.sockets_per_node * physinfo.nr_nodes); + node->tot_mem = ((unsigned long long)physinfo.total_pages) + * handle->page_size; + node->free_mem = ((unsigned long long)physinfo.free_pages) + * handle->page_size; + + /* malloc(0) is not portable, so allocate a single domain. This will + * be resized below. */ + node->domains = malloc(sizeof(xenstat_domain)); + if (node->domains == NULL) { + free(node); + return NULL; + } + + num_domains = 0; + do { + xenstat_domain *domain; + + new_domains = xi_get_domaininfolist(handle->xihandle, + domaininfo, num_domains, + DOMAIN_CHUNK_SIZE); + + node->domains = realloc(node->domains, + (num_domains + new_domains) + * sizeof(xenstat_domain)); + if (node->domains == NULL) { + free(node); + return NULL; + } + + domain = node->domains + num_domains; + + for (i = 0; i < new_domains; i++) { + /* Fill in domain using domaininfo[i] */ + domain->id = domaininfo[i].domain; + domain->state = domaininfo[i].flags; + domain->cpu_ns = domaininfo[i].cpu_time; + domain->num_vcpus = domaininfo[i].n_vcpu; + domain->vcpus = NULL; + domain->cur_mem = + ((unsigned long long)domaininfo[i].tot_pages) + * handle->page_size; + domain->max_mem = + domaininfo[i].max_pages == UINT_MAX + ? (unsigned long long)-1 + : (unsigned long long)(domaininfo[i].max_pages + * handle->page_size); + domain->ssid = domaininfo[i].ssidref; + domain->num_networks = 0; + domain->networks = NULL; + + domain++; + } + num_domains += new_domains; + } while (new_domains == DOMAIN_CHUNK_SIZE); + node->num_domains = num_domains; + + /* Run all the extra data collectors requested */ + node->flags = 0; + for (i = 0; i < NUM_COLLECTORS; i++) { + if ((flags & collectors[i].flag) == collectors[i].flag) { + node->flags |= collectors[i].flag; + if(collectors[i].collect(handle, node) == 0) { + xenstat_free_node(node); + return NULL; + } + } + } + + return node; +} + +void xenstat_free_node(xenstat_node * node) +{ + int i; + + if (node) { + if (node->domains) { + for (i = 0; i < NUM_COLLECTORS; i++) + if((node->flags & collectors[i].flag) + == collectors[i].flag) + collectors[i].free(node); + free(node->domains); + } + free(node); + } +} + +xenstat_domain *xenstat_node_domain(xenstat_node * node, unsigned int domid) +{ + unsigned int i; + + /* FIXME: binary search */ + /* Find the appropriate domain entry in the node struct. */ + for (i = 0; i < node->num_domains; i++) { + if (node->domains[i].id == domid) + return &(node->domains[i]); + } + return NULL; +} + +xenstat_domain *xenstat_node_domain_by_index(xenstat_node * node, + unsigned int index) +{ + if (0 <= index && index < node->num_domains) + return &(node->domains[index]); + return NULL; +} + +const char *xenstat_node_xen_ver(xenstat_node * node) +{ + return node->xen_version; +} + +unsigned long long xenstat_node_tot_mem(xenstat_node * node) +{ + return node->tot_mem; +} + +unsigned long long xenstat_node_free_mem(xenstat_node * node) +{ + return node->free_mem; +} + +unsigned int xenstat_node_num_domains(xenstat_node * node) +{ + return node->num_domains; +} + +unsigned int xenstat_node_num_cpus(xenstat_node * node) +{ + return node->num_cpus; +} + +/* Get information about the CPU speed */ +unsigned long long xenstat_node_cpu_hz(xenstat_node * node) +{ + return node->cpu_hz; +} + +/* Get the domain ID for this domain */ +unsigned xenstat_domain_id(xenstat_domain * domain) +{ + return domain->id; +} + +/* Get information about how much CPU time has been used */ +unsigned long long xenstat_domain_cpu_ns(xenstat_domain * domain) +{ + return domain->cpu_ns; +} + +/* Find the number of VCPUs allocated to a domain */ +unsigned int xenstat_domain_num_vcpus(xenstat_domain * domain) +{ + return domain->num_vcpus; +} + +xenstat_vcpu *xenstat_domain_vcpu(xenstat_domain * domain, unsigned int vcpu) +{ + if (0 <= vcpu && vcpu < domain->num_vcpus) + return &(domain->vcpus[vcpu]); + return NULL; +} + +/* Find the current memory reservation for this domain */ +unsigned long long xenstat_domain_cur_mem(xenstat_domain * domain) +{ + return domain->cur_mem; +} + +/* Find the maximum memory reservation for this domain */ +unsigned long long xenstat_domain_max_mem(xenstat_domain * domain) +{ + return domain->max_mem; +} + +/* Find the domain's SSID */ +unsigned int xenstat_domain_ssid(xenstat_domain * domain) +{ + return domain->ssid; +} + +/* Get domain states */ +unsigned int xenstat_domain_dying(xenstat_domain * domain) +{ + return (domain->state & DOMFLAGS_DYING) == DOMFLAGS_DYING; +} + +unsigned int xenstat_domain_crashed(xenstat_domain * domain) +{ + return ((domain->state & DOMFLAGS_SHUTDOWN) == DOMFLAGS_SHUTDOWN) + && (((domain->state >> DOMFLAGS_SHUTDOWNSHIFT) + & DOMFLAGS_SHUTDOWNMASK) == SHUTDOWN_crash); +} + +unsigned int xenstat_domain_shutdown(xenstat_domain * domain) +{ + return ((domain->state & DOMFLAGS_SHUTDOWN) == DOMFLAGS_SHUTDOWN) + && (((domain->state >> DOMFLAGS_SHUTDOWNSHIFT) + & DOMFLAGS_SHUTDOWNMASK) != SHUTDOWN_crash); +} + +unsigned int xenstat_domain_paused(xenstat_domain * domain) +{ + return (domain->state & DOMFLAGS_PAUSED) == DOMFLAGS_PAUSED; +} + +unsigned int xenstat_domain_blocked(xenstat_domain * domain) +{ + return (domain->state & DOMFLAGS_BLOCKED) == DOMFLAGS_BLOCKED; +} + +unsigned int xenstat_domain_running(xenstat_domain * domain) +{ + return (domain->state & DOMFLAGS_RUNNING) == DOMFLAGS_RUNNING; +} + +/* Get the number of networks for a given domain */ +unsigned int xenstat_domain_num_networks(xenstat_domain * domain) +{ + return domain->num_networks; +} + +/* Get the network handle to obtain network stats */ +xenstat_network *xenstat_domain_network(xenstat_domain * domain, + unsigned int network) +{ + if (domain->networks && 0 <= network && network < domain->num_networks) + return &(domain->networks[network]); + return NULL; +} + +/* + * VCPU functions + */ +/* Collect information about VCPUs */ +static int xenstat_collect_vcpus(xenstat_handle * handle, xenstat_node * node) +{ + unsigned int i, vcpu; + /* Fill in VCPU information */ + for (i = 0; i < node->num_domains; i++) { + node->domains[i].vcpus = malloc(node->domains[i].num_vcpus + * sizeof(xenstat_vcpu)); + if (node->domains[i].vcpus == NULL) + return 0; + + for (vcpu = 0; vcpu < node->domains[i].num_vcpus; vcpu++) { + /* FIXME: need to be using a more efficient mechanism*/ + long long vcpu_time; + vcpu_time = + xi_get_vcpu_usage(handle->xihandle, + node->domains[i].id, + vcpu); + if (vcpu_time < 0) + return 0; + node->domains[i].vcpus[vcpu].ns = vcpu_time; + } + } + return 1; +} + +/* Free VCPU information */ +static void xenstat_free_vcpus(xenstat_node * node) +{ + unsigned int i; + for (i = 0; i < node->num_domains; i++) + free(node->domains[i].vcpus); +} + +/* Free VCPU information in handle - nothing to do */ +static void xenstat_uninit_vcpus(xenstat_handle * handle) +{ +} + +/* Get VCPU usage */ +unsigned long long xenstat_vcpu_ns(xenstat_vcpu * vcpu) +{ + return vcpu->ns; +} + +/* + * Network functions + */ + +/* Expected format of /proc/net/dev */ +static const char PROCNETDEV_HEADER[] = + "Inter-| Receive |" + " Transmit\n" + " face |bytes packets errs drop fifo frame compressed multicast|" + "bytes packets errs drop fifo colls carrier compressed\n"; + +/* Collect information about networks */ +static int xenstat_collect_networks(xenstat_handle * handle, + xenstat_node * node) +{ + /* Open and validate /proc/net/dev if we haven't already */ + if (handle->procnetdev == NULL) { + char header[sizeof(PROCNETDEV_HEADER)]; + handle->procnetdev = fopen("/proc/net/dev", "r"); + if (handle->procnetdev == NULL) { + perror("Error opening /proc/net/dev"); + return 1; + } + + /* Validate the format of /proc/net/dev */ + if (fread(header, sizeof(PROCNETDEV_HEADER) - 1, 1, + handle->procnetdev) != 1) { + perror("Error reading /proc/net/dev header"); + return 1; + } + header[sizeof(PROCNETDEV_HEADER) - 1] = '\0'; + if (strcmp(header, PROCNETDEV_HEADER) != 0) { + fprintf(stderr, + "Unexpected /proc/net/dev format\n"); + return 1; + } + } + + /* Fill in networks */ + /* FIXME: optimize this */ + fseek(handle->procnetdev, sizeof(PROCNETDEV_HEADER) - 1, SEEK_SET); + while (1) { + xenstat_domain *domain; + xenstat_network net; + unsigned int domid; + int ret = fscanf(handle->procnetdev, + "vif%u.%u:%llu%llu%llu%llu%*u%*u%*u%*u" + "%llu%llu%llu%llu%*u%*u%*u%*u\n", + &domid, &net.id, + &net.tbytes, &net.tpackets, &net.terrs, + &net.tdrop, + &net.rbytes, &net.rpackets, &net.rerrs, + &net.rdrop); + if (ret == EOF) + break; + if (ret != 10) { + unsigned int c; + do { + c = fgetc(handle->procnetdev); + } while (c != '\n' && c != EOF); + if (c == EOF) + break; + continue; + } + + /* FIXME: this does a search for the domid */ + domain = xenstat_node_domain(node, domid); + if (domain == NULL) { + fprintf(stderr, + "Found interface vif%u.%u but domain %u" + " does not exist.\n", domid, net.id, + domid); + continue; + } + if (domain->networks == NULL) { + domain->num_networks = 1; + domain->networks = malloc(sizeof(xenstat_network)); + } else { + domain->num_networks++; + domain->networks = + realloc(domain->networks, + domain->num_networks * + sizeof(xenstat_network)); + } + if (domain->networks == NULL) + return 1; + domain->networks[domain->num_networks - 1] = net; + } + + return 1; +} + +/* Free network information */ +static void xenstat_free_networks(xenstat_node * node) +{ + unsigned int i; + for (i = 0; i < node->num_domains; i++) + free(node->domains[i].networks); +} + +/* Free network information in handle */ +static void xenstat_uninit_networks(xenstat_handle * handle) +{ + if(handle->procnetdev) + fclose(handle->procnetdev); +} + +/* Get the network ID */ +unsigned int xenstat_network_id(xenstat_network * network) +{ + return network->id; +} + +/* Get the number of receive bytes */ +unsigned long long xenstat_network_rbytes(xenstat_network * network) +{ + return network->rbytes; +} + +/* Get the number of receive packets */ +unsigned long long xenstat_network_rpackets(xenstat_network * network) +{ + return network->rpackets; +} + +/* Get the number of receive errors */ +unsigned long long xenstat_network_rerrs(xenstat_network * network) +{ + return network->rerrs; +} + +/* Get the number of receive drops */ +unsigned long long xenstat_network_rdrop(xenstat_network * network) +{ + return network->rdrop; +} + +/* Get the number of transmit bytes */ +unsigned long long xenstat_network_tbytes(xenstat_network * network) +{ + return network->tbytes; +} + +/* Get the number of transmit packets */ +unsigned long long xenstat_network_tpackets(xenstat_network * network) +{ + return network->tpackets; +} + +/* Get the number of transmit errors */ +unsigned long long xenstat_network_terrs(xenstat_network * network) +{ + return network->terrs; +} + +/* Get the number of transmit dropped packets */ +unsigned long long xenstat_network_tdrop(xenstat_network * network) +{ + return network->tdrop; +} diff -r cd984b3478f6 -r cc5f88b719d0 tools/xenstat/libxenstat/src/xenstat.h --- /dev/null Mon Aug 22 18:37:48 2005 +++ b/tools/xenstat/libxenstat/src/xenstat.h Tue Aug 23 19:03:21 2005 @@ -0,0 +1,150 @@ +/* libxenstat: statistics-collection library for Xen + * Copyright (C) International Business Machines Corp., 2005 + * Authors: Josh Triplett <josht@xxxxxxxxxx> + * Judy Fischbach <jfisch@xxxxxxxxxx> + * David Hendricks <dhendrix@xxxxxxxxxx> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + */ + +/* libxenstat API */ + +/* Opaque handles */ +typedef struct xenstat_handle xenstat_handle; +typedef struct xenstat_domain xenstat_domain; +typedef struct xenstat_node xenstat_node; +typedef struct xenstat_vcpu xenstat_vcpu; +typedef struct xenstat_network xenstat_network; + +/* Initialize the xenstat library. Returns a handle to be used with + * subsequent calls to the xenstat library, or NULL if an error occurs. */ +xenstat_handle *xenstat_init(); + +/* Release the handle to libxc, free resources, etc. */ +void xenstat_uninit(xenstat_handle * handle); + +/* Get all available information about a node */ +#define XENSTAT_VCPU 0x1 +#define XENSTAT_NETWORK 0x2 +#define XENSTAT_ALL (XENSTAT_VCPU|XENSTAT_NETWORK) +xenstat_node *xenstat_get_node(xenstat_handle * handle, unsigned int flags); + +/* Free the information */ +void xenstat_free_node(xenstat_node * node); + +/* + * Node functions - extract information from a xenstat_node + */ + +/* Get information about the domain with the given domain ID */ +xenstat_domain *xenstat_node_domain(xenstat_node * node, + unsigned int domid); + +/* Get the domain with the given index; used to loop over all domains. */ +xenstat_domain *xenstat_node_domain_by_index(xenstat_node * node, + unsigned index); +/* Get xen version of the node */ +const char *xenstat_node_xen_ver(xenstat_node * node); + +/* Get amount of total memory on a node */ +unsigned long long xenstat_node_tot_mem(xenstat_node * node); + +/* Get amount of free memory on a node */ +unsigned long long xenstat_node_free_mem(xenstat_node * node); + +/* Find the number of domains existing on a node */ +unsigned int xenstat_node_num_domains(xenstat_node * node); + +/* Find the number of CPUs existing on a node */ +unsigned int xenstat_node_num_cpus(xenstat_node * node); + +/* Get information about the CPU speed */ +unsigned long long xenstat_node_cpu_hz(xenstat_node * node); + +/* + * Domain functions - extract information from a xenstat_domain + */ + +/* Get the domain ID for this domain */ +unsigned xenstat_domain_id(xenstat_domain * domain); + +/* Get information about how much CPU time has been used */ +unsigned long long xenstat_domain_cpu_ns(xenstat_domain * domain); + +/* Find the number of VCPUs allocated to a domain */ +unsigned int xenstat_domain_num_vcpus(xenstat_domain * domain); + +/* Get the VCPU handle to obtain VCPU stats */ +xenstat_vcpu *xenstat_domain_vcpu(xenstat_domain * domain, + unsigned int vcpu); + +/* Find the current memory reservation for this domain */ +unsigned long long xenstat_domain_cur_mem(xenstat_domain * domain); + +/* Find the maximum memory reservation for this domain */ +unsigned long long xenstat_domain_max_mem(xenstat_domain * domain); + +/* Find the domain's SSID */ +unsigned int xenstat_domain_ssid(xenstat_domain * domain); + +/* Get domain states */ +unsigned int xenstat_domain_dying(xenstat_domain * domain); +unsigned int xenstat_domain_crashed(xenstat_domain * domain); +unsigned int xenstat_domain_shutdown(xenstat_domain * domain); +unsigned int xenstat_domain_paused(xenstat_domain * domain); +unsigned int xenstat_domain_blocked(xenstat_domain * domain); +unsigned int xenstat_domain_running(xenstat_domain * domain); + +/* Get the number of networks for a given domain */ +unsigned int xenstat_domain_num_networks(xenstat_domain *); + +/* Get the network handle to obtain network stats */ +xenstat_network *xenstat_domain_network(xenstat_domain * domain, + unsigned int network); + +/* + * VCPU functions - extract information from a xenstat_vcpu + */ + +/* Get VCPU usage */ +unsigned long long xenstat_vcpu_ns(xenstat_vcpu * vcpu); + + +/* + * Network functions - extract information from a xenstat_network + */ + +/* Get the ID for this network */ +unsigned int xenstat_network_id(xenstat_network * network); + +/* Get the number of receive bytes for this network */ +unsigned long long xenstat_network_rbytes(xenstat_network * network); + +/* Get the number of receive packets for this network */ +unsigned long long xenstat_network_rpackets(xenstat_network * network); + +/* Get the number of receive errors for this network */ +unsigned long long xenstat_network_rerrs(xenstat_network * network); + +/* Get the number of receive drops for this network */ +unsigned long long xenstat_network_rdrop(xenstat_network * network); + +/* Get the number of transmit bytes for this network */ +unsigned long long xenstat_network_tbytes(xenstat_network * network); + +/* Get the number of transmit packets for this network */ +unsigned long long xenstat_network_tpackets(xenstat_network * network); + +/* Get the number of transmit errors for this network */ +unsigned long long xenstat_network_terrs(xenstat_network * network); + +/* Get the number of transmit drops for this network */ +unsigned long long xenstat_network_tdrop(xenstat_network * network); diff -r cd984b3478f6 -r cc5f88b719d0 tools/xenstat/xentop/Makefile --- /dev/null Mon Aug 22 18:37:48 2005 +++ b/tools/xenstat/xentop/Makefile Tue Aug 23 19:03:21 2005 @@ -0,0 +1,44 @@ +# Copyright (C) International Business Machines Corp., 2005 +# Author: Josh Triplett <josht@xxxxxxxxxx> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; under version 2 of the License. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +XEN_ROOT=../../.. +include $(XEN_ROOT)/tools/Rules.mk + +ifneq ($(XENSTAT_XENTOP),y) +all install xentop: +else + +INSTALL = install +INSTALL_PROG = $(INSTALL) -m0755 -D +INSTALL_DATA = $(INSTALL) -m0644 -D + +prefix=/usr +mandir=$(prefix)/share/man +man1dir=$(mandir)/man1 +sbindir=$(prefix)/sbin + +CFLAGS += -DGCC_PRINTF -Wall -Werror -I$(XEN_LIBXENSTAT) +LDFLAGS += -L$(XEN_LIBXENSTAT) +LDLIBS += -lxenstat -lncurses + +all: xentop + +xentop: xentop.o + +install: xentop xentop.1 + $(INSTALL_PROG) xentop $(DESTDIR)$(sbindir)/xentop + $(INSTALL_DATA) xentop.1 $(DESTDIR)$(man1dir)/xentop.1 + +endif + +clean: + rm -f xentop xentop.o diff -r cd984b3478f6 -r cc5f88b719d0 tools/xenstat/xentop/TODO --- /dev/null Mon Aug 22 18:37:48 2005 +++ b/tools/xenstat/xentop/TODO Tue Aug 23 19:03:21 2005 @@ -0,0 +1,34 @@ +Display error messages on the help line after bad input at a prompt. +Fractional delay times +Use prompting to search for domains +Better line editing? + +* Make CPU in % more accurate +* Domain total network TX % and RX % + +Like Top, f feature, field select of domain columns, toggle the display of +field by typing the letter associated with field, if displayed it shows in +bold and the letter is Capitalized along with a leading asterisk for the +field, if not selected for display letter is lowercase, no leading asterisk +and field is not bolded. + +Like Top, ordering of domain columns, o feature Capital letter shifts left, +lowercase letter shifts right? + +Color +Full management: pause, destroy, create domains + +Add support for Virtual Block Devices (vbd) + +To think about: +Support for one than one node display (distributed monitoring +from any node of all other nodes in a cluster) +Bottom line option (Switch node, Search node [tab completion?]) + +Capture/Logging of resource information generated during a time interval. +-b batch mode dump snapshots to standard output (used with -n) +-n number of iterations to dump to standard output (unlimited if not specified) +-d monitor DomIDs as -dD1,-dD2 or -dD1,D2... + Monitor only domains with specified domain IDs +-m monitor nodeIDs as -mN1,-mN2 or -mN1,N2... + Monitor only domains with specified node IDs diff -r cd984b3478f6 -r cc5f88b719d0 tools/xenstat/xentop/xentop.1 --- /dev/null Mon Aug 22 18:37:48 2005 +++ b/tools/xenstat/xentop/xentop.1 Tue Aug 23 19:03:21 2005 @@ -0,0 +1,88 @@ +.\" Copyright (C) International Business Machines Corp., 2005 +.\" Author: Josh Triplett <josht@xxxxxxxxxx> +.\" +.\" This program is free software; you can redistribute it and/or modify +.\" it under the terms of the GNU General Public License as published by +.\" the Free Software Foundation; under version 2 of the License. +.\" +.\" This program is distributed in the hope that it will be useful, +.\" but WITHOUT ANY WARRANTY; without even the implied warranty of +.\" MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +.\" GNU General Public License for more details. +.\" +.\" You should have received a copy of the GNU General Public License +.\" along with this program; if not, write to the Free Software +.\" Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +.TH xentop 1 "August 2005" +.SH NAME +\fBxentop\fR \- displays real-time information about a Xen system and domains + +.SH SYNOPSIS +.B xentop +[\fB\-h\fR] +[\fB\-V\fR] +[\fB\-d\fRSECONDS] +[\fB\-n\fR] +[\fB\-r\fR] +[\fB\-v\fR] + +.SH DESCRIPTION +\fBxentop\fR displays information about the Xen system and domains, in a +continually-updating manner. Command-line options and interactive commands +can change the detail and format of the information displayed by \fBxentop\fR. + +.SH OPTIONS +.TP +\fB\-h\fR, \fB\-\-help\fR +display help and exit +.TP +\fB\-V\fR, \fB\-\-version\fR +output version information and exit +.TP +\fB\-d\fR, \fB\-\-delay\fR=\fISECONDS\fR +seconds between updates (default 3) +.TP +\fB\-n\fR, \fB\-\-networks\fR +output network information +.TP +\fB\-r\fR, \fB\-\-repeat\-header\fR +repeat table header before each domain +.TP +\fB\-v\fR, \fB\-\-vcpus\fR +output VCPU data + +.SH "INTERACTIVE COMMANDS" +All interactive commands are case-insensitive. +.TP +.B D +set delay between updates +.TP +.B N +toggle display of network information +.TP +.B Q, Esc +quit +.TP +.B R +toggle table header before each domain +.TP +.B S +cycle sort order +.TP +.B V +toggle display of VCPU information +.TP +.B Arrows +scroll domain display + +.SH AUTHORS +Written by Judy Fischbach, David Hendricks, and Josh Triplett + +.SH "REPORTING BUGS" +Report bugs to <dsteklof@xxxxxxxxxx>. + +.SH COPYRIGHT +Copyright \(co 2005 International Business Machines Corp +.br +This is free software; see the source for copying conditions. There is NO +warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. diff -r cd984b3478f6 -r cc5f88b719d0 tools/xenstat/xentop/xentop.c --- /dev/null Mon Aug 22 18:37:48 2005 +++ b/tools/xenstat/xentop/xentop.c Tue Aug 23 19:03:21 2005 @@ -0,0 +1,876 @@ +/* + * Copyright (C) International Business Machines Corp., 2005 + * Author(s): Judy Fischbach <jfisch@xxxxxxxxxx> + * David Hendricks <dhendrix@xxxxxxxxxx> + * Josh Triplett <josht@xxxxxxxxxx> + * based on code from Anthony Liguori <aliguori@xxxxxxxxxx> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; under version 2 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +#include <curses.h> +#include <ctype.h> +#include <errno.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/time.h> +#include <time.h> +#include <unistd.h> + +#include <xenstat.h> + +#define XENTOP_VERSION "1.0" + +#define XENTOP_DISCLAIMER \ +"Copyright (C) 2005 International Business Machines Corp\n"\ +"This is free software; see the source for copying conditions.There is NO\n"\ +"warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n" +#define XENTOP_BUGSTO "Report bugs to <dsteklof@xxxxxxxxxx>.\n" + +#define _GNU_SOURCE +#include <getopt.h> + +#if !defined(__GNUC__) && !defined(__GNUG__) +#define __attribute__(arg) /* empty */ +#endif + +#define KEY_ESCAPE '\x1B' + +/* + * Function prototypes + */ +/* Utility functions */ +static void usage(const char *); +static void version(void); +static void cleanup(void); +static void fail(const char *); +static int current_row(void); +static int lines(void); +static void print(const char *, ...) __attribute__((format(printf,1,2))); +static void attr_addstr(int attr, const char *str); +static void set_delay(char *value); +static void set_prompt(char *new_prompt, void (*func)(char *)); +static int handle_key(int); +static int compare(unsigned long long, unsigned long long); +static int compare_domains(xenstat_domain **, xenstat_domain **); +static unsigned long long tot_net_bytes( xenstat_domain *, int); + +/* Field functions */ +static int compare_domid(xenstat_domain *domain1, xenstat_domain *domain2); +static void print_domid(xenstat_domain *domain); +static int compare_state(xenstat_domain *domain1, xenstat_domain *domain2); +static void print_state(xenstat_domain *domain); +static int compare_cpu(xenstat_domain *domain1, xenstat_domain *domain2); +static void print_cpu(xenstat_domain *domain); +static int compare_cpu_pct(xenstat_domain *domain1, xenstat_domain *domain2); +static void print_cpu_pct(xenstat_domain *domain); +static int compare_mem(xenstat_domain *domain1, xenstat_domain *domain2); +static void print_mem(xenstat_domain *domain); +static void print_mem_pct(xenstat_domain *domain); +static int compare_maxmem(xenstat_domain *domain1, xenstat_domain *domain2); +static void print_maxmem(xenstat_domain *domain); +static void print_max_pct(xenstat_domain *domain); +static int compare_vcpus(xenstat_domain *domain1, xenstat_domain *domain2); +static void print_vcpus(xenstat_domain *domain); +static int compare_nets(xenstat_domain *domain1, xenstat_domain *domain2); +static void print_nets(xenstat_domain *domain); +static int compare_net_tx(xenstat_domain *domain1, xenstat_domain *domain2); +static void print_net_tx(xenstat_domain *domain); +static int compare_net_rx(xenstat_domain *domain1, xenstat_domain *domain2); +static void print_net_rx(xenstat_domain *domain); +static int compare_ssid(xenstat_domain *domain1, xenstat_domain *domain2); +static void print_ssid(xenstat_domain *domain); + +/* Section printing functions */ +static void do_summary(void); +static void do_header(void); +static void do_bottom_line(void); +static void do_domain(xenstat_domain *); +static void do_vcpu(xenstat_domain *); +static void do_network(xenstat_domain *); +static void top(void); + +/* Field types */ +typedef enum field_id { + FIELD_DOMID, + FIELD_STATE, + FIELD_CPU, + FIELD_CPU_PCT, + FIELD_MEM, + FIELD_MEM_PCT, + FIELD_MAXMEM, + FIELD_MAX_PCT, + FIELD_VCPUS, + FIELD_NETS, + FIELD_NET_TX, + FIELD_NET_RX, + FIELD_SSID +} field_id; + +typedef struct field { + field_id num; + const char *header; + unsigned int default_width; + int (*compare)(xenstat_domain *domain1, xenstat_domain *domain2); + void (*print)(xenstat_domain *domain); +} field; + +field fields[] = { + { FIELD_DOMID, "DOMID", 5, compare_domid, print_domid }, + { FIELD_STATE, "STATE", 6, compare_state, print_state }, + { FIELD_CPU, "CPU(sec)", 10, compare_cpu, print_cpu }, + { FIELD_CPU_PCT, "CPU(%)", 6, compare_cpu_pct, print_cpu_pct }, + { FIELD_MEM, "MEM(k)", 10, compare_mem, print_mem }, + { FIELD_MEM_PCT, "MEM(%)", 6, compare_mem, print_mem_pct }, + { FIELD_MAXMEM, "MAXMEM(k)", 10, compare_maxmem, print_maxmem }, + { FIELD_MAX_PCT, "MAXMEM(%)", 9, compare_maxmem, print_max_pct }, + { FIELD_VCPUS, "VCPUS", 5, compare_vcpus, print_vcpus }, + { FIELD_NETS, "NETS", 4, compare_nets, print_nets }, + { FIELD_NET_TX, "NETTX(k)", 8, compare_net_tx, print_net_tx }, + { FIELD_NET_RX, "NETRX(k)", 8, compare_net_rx, print_net_rx }, + { FIELD_SSID, "SSID", 4, compare_ssid, print_ssid } +}; + +const unsigned int NUM_FIELDS = sizeof(fields)/sizeof(field); + +/* Globals */ +struct timeval curtime, oldtime; +xenstat_handle *xhandle = NULL; +xenstat_node *prev_node = NULL; +xenstat_node *cur_node = NULL; +field_id sort_field = FIELD_DOMID; +unsigned int first_domain_index = 0; +unsigned int delay = 3; +int show_vcpus = 0; +int show_networks = 0; +int repeat_header = 0; +#define PROMPT_VAL_LEN 80 +char *prompt = NULL; +char prompt_val[PROMPT_VAL_LEN]; +int prompt_val_len = 0; +void (*prompt_complete_func)(char *); + +/* + * Function definitions + */ + +/* Utility functions */ + +/* Print usage message, using given program name */ +static void usage(const char *program) +{ + printf("Usage: %s [OPTION]\n" + "Displays ongoing information about xen vm resources \n\n" + "-h, --help display this help and exit\n" + "-V, --version output version information and exit\n" + "-d, --delay=SECONDS seconds between updates (default 3)\n" + "-n, --networks output vif network data\n" + "-r, --repeat-header repeat table header before each domain\n" + "-v, --vcpus output vcpu data\n" + "\n" XENTOP_BUGSTO, + program); + return; +} + +/* Print program version information */ +static void version(void) +{ + printf("xentop " XENTOP_VERSION "\n" + "Written by Judy Fischbach, David Hendricks, Josh Triplett\n" + "\n" XENTOP_DISCLAIMER); +} + +/* Clean up any open resources */ +static void cleanup(void) +{ + if(!isendwin()) + endwin(); + if(prev_node != NULL) + xenstat_free_node(prev_node); + if(cur_node != NULL) + xenstat_free_node(cur_node); + if(xhandle != NULL) + xenstat_uninit(xhandle); +} + +/* Display the given message and gracefully exit */ +static void fail(const char *str) +{ + if(!isendwin()) + endwin(); + fprintf(stderr, str); + exit(1); +} + +/* Return the row containing the cursor. */ +static int current_row(void) +{ + int y, x; + getyx(stdscr, y, x); + return y; +} + +/* Return the number of lines on the screen. */ +static int lines(void) +{ + int y, x; + getmaxyx(stdscr, y, x); + return y; +} + +/* printf-style print function which calls printw, but only if the cursor is + * not on the last line. */ +static void print(const char *fmt, ...) +{ + va_list args; + + if(current_row() < lines()-1) { + va_start(args, fmt); + vw_printw(stdscr, fmt, args); + va_end(args); + } +} + +/* Print a string with the given attributes set. */ +static void attr_addstr(int attr, const char *str) +{ + attron(attr); + addstr(str); + attroff(attr); +} + +/* Handle setting the delay from the user-supplied value in prompt_val */ +static void set_delay(char *value) +{ + int new_delay; + new_delay = atoi(value); + if(new_delay > 0) + delay = new_delay; +} + +/* Enable prompting mode with the given prompt string; call the given function + * when a value is available. */ +static void set_prompt(char *new_prompt, void (*func)(char *)) +{ + prompt = new_prompt; + prompt_val[0] = '\0'; + prompt_val_len = 0; + prompt_complete_func = func; +} + +/* Handle user input, return 0 if the program should quit, or 1 if not */ +static int handle_key(int ch) +{ + if(prompt == NULL) { + /* Not prompting for input; handle interactive commands */ + switch(ch) { + case 'n': case 'N': + show_networks ^= 1; + break; + case 'r': case 'R': + repeat_header ^= 1; + break; + case 's': case 'S': + sort_field = (sort_field + 1) % NUM_FIELDS; + break; + case 'v': case 'V': + show_vcpus ^= 1; + break; + case KEY_DOWN: + first_domain_index++; + break; + case KEY_UP: + if(first_domain_index > 0) + first_domain_index--; + break; + case 'd': case 'D': + set_prompt("Delay(sec)", set_delay); + break; + case 'q': case 'Q': case KEY_ESCAPE: + return 0; + } + } else { + /* Prompting for input; handle line editing */ + switch(ch) { + case '\r': + prompt_complete_func(prompt_val); + set_prompt(NULL, NULL); + break; + case KEY_ESCAPE: + set_prompt(NULL, NULL); + break; + case KEY_BACKSPACE: + if(prompt_val_len > 0) + prompt_val[--prompt_val_len] = '\0'; + default: + if((prompt_val_len+1) < PROMPT_VAL_LEN + && isprint(ch)) { + prompt_val[prompt_val_len++] = (char)ch; + prompt_val[prompt_val_len] = '\0'; + } + } + } + + return 1; +} + +/* Compares two integers, returning -1,0,1 for <,=,> */ +static int compare(unsigned long long i1, unsigned long long i2) +{ + if(i1 < i2) + return -1; + if(i1 > i2) + return 1; + return 0; +} + +/* Comparison function for use with qsort. Compares two domains using the + * current sort field. */ +static int compare_domains(xenstat_domain **domain1, xenstat_domain **domain2) +{ + return fields[sort_field].compare(*domain1, *domain2); +} + +/* Field functions */ + +/* Compares domain ids of two domains, returning -1,0,1 for <,=,> */ +int compare_domid(xenstat_domain *domain1, xenstat_domain *domain2) +{ + return compare(xenstat_domain_id(domain1), xenstat_domain_id(domain2)); +} + +/* Prints domain identification number */ +void print_domid(xenstat_domain *domain) +{ + print("%5u", xenstat_domain_id(domain)); +} + +struct { + unsigned int (*get)(xenstat_domain *); + char ch; +} state_funcs[] = { + { xenstat_domain_dying, 'd' }, + { xenstat_domain_shutdown, 's' }, + { xenstat_domain_blocked, 'b' }, + { xenstat_domain_crashed, 'c' }, + { xenstat_domain_paused, 'p' }, + { xenstat_domain_running, 'r' } +}; +const unsigned int NUM_STATES = sizeof(state_funcs)/sizeof(*state_funcs); + +/* Compare states of two domains, returning -1,0,1 for <,=,> */ +static int compare_state(xenstat_domain *domain1, xenstat_domain *domain2) +{ + unsigned int i, d1s, d2s; + for(i = 0; i < NUM_STATES; i++) { + d1s = state_funcs[i].get(domain1); + d2s = state_funcs[i].get(domain2); + if(d1s && !d2s) + return -1; + if(d2s && !d1s) + return 1; + } + return 0; +} + +/* Prints domain state in abbreviated letter format */ +static void print_state(xenstat_domain *domain) +{ + unsigned int i; + for(i = 0; i < NUM_STATES; i++) + print("%c", state_funcs[i].get(domain) ? state_funcs[i].ch + : '-'); +} + +/* Compares cpu usage of two domains, returning -1,0,1 for <,=,> */ +static int compare_cpu(xenstat_domain *domain1, xenstat_domain *domain2) +{ + return -compare(xenstat_domain_cpu_ns(domain1), + xenstat_domain_cpu_ns(domain2)); +} + +/* Prints domain cpu usage in seconds */ +static void print_cpu(xenstat_domain *domain) +{ + print("%10llu", xenstat_domain_cpu_ns(domain)/1000000000); +} + +/* Computes the CPU percentage used for a specified domain */ +static double get_cpu_pct(xenstat_domain *domain) +{ + xenstat_domain *old_domain; + double us_elapsed; + + /* Can't calculate CPU percentage without a previous sample. */ + if(prev_node == NULL) + return 0.0; + + old_domain = xenstat_node_domain(prev_node, xenstat_domain_id(domain)); + if(old_domain == NULL) + return 0.0; + + /* Calculate the time elapsed in microseconds */ + us_elapsed = ((curtime.tv_sec-oldtime.tv_sec)*1000000.0 + +(curtime.tv_usec - oldtime.tv_usec)); + + /* In the following, nanoseconds must be multiplied by 1000.0 to + * convert to microseconds, then divided by 100.0 to get a percentage, + * resulting in a multiplication by 10.0 */ + return ((xenstat_domain_cpu_ns(domain) + -xenstat_domain_cpu_ns(old_domain))/10.0)/us_elapsed; +} + +static int compare_cpu_pct(xenstat_domain *domain1, xenstat_domain *domain2) +{ + return -compare(get_cpu_pct(domain1), get_cpu_pct(domain2)); +} + +/* Prints cpu percentage statistic */ +static void print_cpu_pct(xenstat_domain *domain) +{ + print("%6.1f", get_cpu_pct(domain)); +} + +/* Compares current memory of two domains, returning -1,0,1 for <,=,> */ +static int compare_mem(xenstat_domain *domain1, xenstat_domain *domain2) +{ + return -compare(xenstat_domain_cur_mem(domain1), + xenstat_domain_cur_mem(domain2)); +} + +/* Prints current memory statistic */ +static void print_mem(xenstat_domain *domain) +{ + print("%10llu", xenstat_domain_cur_mem(domain)/1024); +} + +/* Prints memory percentage statistic, ratio of current domain memory to total + * node memory */ +static void print_mem_pct(xenstat_domain *domain) +{ + print("%6.1f", (double)xenstat_domain_cur_mem(domain) / + (double)xenstat_node_tot_mem(cur_node) * 100); +} + +/* Compares maximum memory of two domains, returning -1,0,1 for <,=,> */ +static int compare_maxmem(xenstat_domain *domain1, xenstat_domain *domain2) +{ + return -compare(xenstat_domain_max_mem(domain1), + xenstat_domain_max_mem(domain2)); +} + +/* Prints maximum domain memory statistic in KB */ +static void print_maxmem(xenstat_domain *domain) +{ + unsigned long long max_mem = xenstat_domain_max_mem(domain); + if(max_mem == ((unsigned long long)-1)) + print("%10s", "no limit"); + else + print("%10llu", max_mem/1024); +} + +/* Prints memory percentage statistic, ratio of current domain memory to total + * node memory */ +static void print_max_pct(xenstat_domain *domain) +{ + if (xenstat_domain_max_mem(domain) == (unsigned long long)-1) + print("%9s", "n/a"); + else + print("%9.1f", (double)xenstat_domain_max_mem(domain) / + (double)xenstat_node_tot_mem(cur_node) * 100); +} + +/* Compares number of virtual CPUs of two domains, returning -1,0,1 for + * <,=,> */ +static int compare_vcpus(xenstat_domain *domain1, xenstat_domain *domain2) +{ + return -compare(xenstat_domain_num_vcpus(domain1), + xenstat_domain_num_vcpus(domain2)); +} + +/* Prints number of virtual CPUs statistic */ +static void print_vcpus(xenstat_domain *domain) +{ + print("%5u", xenstat_domain_num_vcpus(domain)); +} + +/* Compares number of virtual networks of two domains, returning -1,0,1 for + * <,=,> */ +static int compare_nets(xenstat_domain *domain1, xenstat_domain *domain2) +{ + return -compare(xenstat_domain_num_networks(domain1), + xenstat_domain_num_networks(domain2)); +} + +/* Prints number of virtual networks statistic */ +static void print_nets(xenstat_domain *domain) +{ + print("%4u", xenstat_domain_num_networks(domain)); +} + +/* Compares number of total network tx bytes of two domains, returning -1,0,1 for + * <,=,> */ +static int compare_net_tx(xenstat_domain *domain1, xenstat_domain *domain2) +{ + return -compare(tot_net_bytes(domain1, FALSE), + tot_net_bytes(domain2, FALSE)); +} + +/* Prints number of total network tx bytes statistic */ +static void print_net_tx(xenstat_domain *domain) +{ + print("%8llu", tot_net_bytes(domain, FALSE)/1024); +} + +/* Compares number of total network rx bytes of two domains, returning -1,0,1 for + * <,=,> */ +static int compare_net_rx(xenstat_domain *domain1, xenstat_domain *domain2) +{ + return -compare(tot_net_bytes(domain1, TRUE), + tot_net_bytes(domain2, TRUE)); +} + +/* Prints number of total network rx bytes statistic */ +static void print_net_rx(xenstat_domain *domain) +{ + print("%8llu", tot_net_bytes(domain, TRUE)/1024); +} + +/* Gets number of total network bytes statistic, if rx true, then rx bytes + * otherwise tx bytes + */ +static unsigned long long tot_net_bytes(xenstat_domain *domain, int rx_flag) +{ + int i = 0; + xenstat_network *network; + unsigned num_networks = 0; + unsigned long long total = 0; + + /* How many networks? */ + num_networks = xenstat_domain_num_networks(domain); + + /* Dump information for each network */ + for (i=0; i < num_networks; i++) { + /* Next get the network information */ + network = xenstat_domain_network(domain,i); + if (rx_flag) + total += xenstat_network_rbytes(network); + else + total += xenstat_network_tbytes(network); + } + return (total); +} + +/* Compares security id (ssid) of two domains, returning -1,0,1 for <,=,> */ +static int compare_ssid(xenstat_domain *domain1, xenstat_domain *domain2) +{ + return compare(xenstat_domain_ssid(domain1), + xenstat_domain_ssid(domain2)); +} + +/* Prints ssid statistic */ +static void print_ssid(xenstat_domain *domain) +{ + print("%4u", xenstat_domain_ssid(domain)); +} + +/* Section printing functions */ +/* Prints the top summary, above the domain table */ +void do_summary(void) +{ +#define TIME_STR_LEN 9 + const char *TIME_STR_FORMAT = "%H:%M:%S"; + char time_str[TIME_STR_LEN]; + unsigned run = 0, block = 0, pause = 0, + crash = 0, dying = 0, shutdown = 0; + unsigned i, num_domains = 0; + unsigned long long used = 0; + xenstat_domain *domain; + + /* Print program name, current time, and number of domains */ + strftime(time_str, TIME_STR_LEN, TIME_STR_FORMAT, + localtime(&curtime.tv_sec)); + num_domains = xenstat_node_num_domains(cur_node); + print("xentop - %s\n", time_str); + + /* Tabulate what states domains are in for summary */ + for (i=0; i < num_domains; i++) { + domain = xenstat_node_domain_by_index(cur_node,i); + if (xenstat_domain_running(domain)) run++; + else if (xenstat_domain_blocked(domain)) block++; + else if (xenstat_domain_paused(domain)) pause++; + else if (xenstat_domain_shutdown(domain)) shutdown++; + else if (xenstat_domain_crashed(domain)) crash++; + else if (xenstat_domain_dying(domain)) dying++; + } + + print("%u domains: %u running, %u blocked, %u paused, " + "%u crashed, %u dying, %u shutdown \n", + num_domains, run, block, pause, crash, dying, shutdown); + + used = xenstat_node_tot_mem(cur_node)-xenstat_node_free_mem(cur_node); + + /* Dump node memory and cpu information */ + print("Mem: %lluk total, %lluk used, %lluk free " + "CPUs: %u @ %lluMHz\n", + xenstat_node_tot_mem(cur_node)/1024, used/1024, + xenstat_node_free_mem(cur_node)/1024, + xenstat_node_num_cpus(cur_node), + xenstat_node_cpu_hz(cur_node)/1000000); +} + +/* Display the top header for the domain table */ +void do_header(void) +{ + field_id i; + + /* Turn on REVERSE highlight attribute for headings */ + attron(A_REVERSE); + for(i = 0; i < NUM_FIELDS; i++) { + if(i != 0) + print(" "); + /* The BOLD attribute is turned on for the sort column */ + if(i == sort_field) + attron(A_BOLD); + print("%*s", fields[i].default_width, fields[i].header); + if(i == sort_field) + attroff(A_BOLD); + } + attroff(A_REVERSE); + print("\n"); +} + +/* Displays bottom status line or current prompt */ +void do_bottom_line(void) +{ + move(lines()-1, 2); + + if (prompt != NULL) { + printw("%s: %s", prompt, prompt_val); + } else { + addch(A_REVERSE | 'D'); addstr("elay "); + + /* network */ + addch(A_REVERSE | 'N'); + attr_addstr(show_networks ? COLOR_PAIR(1) : 0, "etworks"); + addstr(" "); + + /* vcpus */ + addch(A_REVERSE | 'V'); + attr_addstr(show_vcpus ? COLOR_PAIR(1) : 0, "CPUs"); + addstr(" "); + + /* repeat */ + addch(A_REVERSE | 'R'); + attr_addstr(repeat_header ? COLOR_PAIR(1) : 0, "epeat header"); + addstr(" "); + + /* sort order */ + addch(A_REVERSE | 'S'); addstr("ort order "); + + addch(A_REVERSE | 'Q'); addstr("uit "); + } +} + +/* Prints Domain information */ +void do_domain(xenstat_domain *domain) +{ + unsigned int i; + for(i = 0; i < NUM_FIELDS; i++) { + if(i != 0) + print(" "); + if(i == sort_field) + attron(A_BOLD); + fields[i].print(domain); + if(i == sort_field) + attroff(A_BOLD); + } + print("\n"); +} + +/* Output all vcpu information */ +void do_vcpu(xenstat_domain *domain) +{ + int i = 0; + unsigned num_vcpus = 0; + xenstat_vcpu *vcpu; + + print("VCPUs(sec): "); + + num_vcpus = xenstat_domain_num_vcpus(domain); + + /* for all vcpus dump out values */ + for (i=0; i< num_vcpus; i++) { + vcpu = xenstat_domain_vcpu(domain,i); + + if (i != 0 && (i%5)==0) + print("\n "); + print(" %2u: %10llus", i, xenstat_vcpu_ns(vcpu)/1000000000); + } + print("\n"); +} + +/* Output all network information */ +void do_network(xenstat_domain *domain) +{ + int i = 0; + xenstat_network *network; + unsigned num_networks = 0; + + /* How many networks? */ + num_networks = xenstat_domain_num_networks(domain); + + /* Dump information for each network */ + for (i=0; i < num_networks; i++) { + /* Next get the network information */ + network = xenstat_domain_network(domain,i); + + print("Net%d RX: %8llubytes %8llupkts %8lluerr %8lludrop ", + i, + xenstat_network_rbytes(network), + xenstat_network_rpackets(network), + xenstat_network_rerrs(network), + xenstat_network_rdrop(network)); + + print("TX: %8llubytes %8llupkts %8lluerr %8lludrop\n", + xenstat_network_tbytes(network), + xenstat_network_tpackets(network), + xenstat_network_terrs(network), + xenstat_network_tdrop(network)); + } +} + +static void top(void) +{ + xenstat_domain **domains; + unsigned int i, num_domains = 0; + + /* Now get the node information */ + if (prev_node != NULL) + xenstat_free_node(prev_node); + prev_node = cur_node; + cur_node = xenstat_get_node(xhandle, XENSTAT_ALL); + if (cur_node == NULL) + fail("Failed to retrieve statistics from libxenstat\n"); + + /* dump summary top information */ + do_summary(); + + /* Count the number of domains for which to report data */ + num_domains = xenstat_node_num_domains(cur_node); + + domains = malloc(num_domains*sizeof(xenstat_domain *)); + if(domains == NULL) + fail("Failed to allocate memory\n"); + + for (i=0; i < num_domains; i++) + domains[i] = xenstat_node_domain_by_index(cur_node, i); + + /* Sort */ + qsort(domains, num_domains, sizeof(xenstat_domain *), + (int(*)(const void *, const void *))compare_domains); + + if(first_domain_index >= num_domains) + first_domain_index = num_domains-1; + + for (i = first_domain_index; i < num_domains; i++) { + if(current_row() == lines()-1) + break; + if (i == first_domain_index || repeat_header) + do_header(); + do_domain(domains[i]); + if (show_vcpus) + do_vcpu(domains[i]); + if (show_networks) + do_network(domains[i]); + } + + do_bottom_line(); +} + +int main(int argc, char **argv) +{ + int opt, optind = 0; + int ch = ERR; + + struct option lopts[] = { + { "help", no_argument, NULL, 'h' }, + { "version", no_argument, NULL, 'V' }, + { "networks", no_argument, NULL, 'n' }, + { "repeat-header", no_argument, NULL, 'r' }, + { "vcpus", no_argument, NULL, 'v' }, + { "delay", required_argument, NULL, 'd' }, + { 0, 0, 0, 0 }, + }; + const char *sopts = "hVbnvd:"; + + if (atexit(cleanup) != 0) + fail("Failed to install cleanup handler.\n"); + + while ((opt = getopt_long(argc, argv, sopts, lopts, &optind)) != -1) { + switch (opt) { + case 'h': + case '?': + default: + usage(argv[0]); + exit(0); + case 'V': + version(); + exit(0); + case 'n': + show_networks = 1; + break; + case 'r': + repeat_header = 1; + break; + case 'v': + show_vcpus = 1; + break; + case 'd': + delay = atoi(optarg); + break; + } + } + + /* Get xenstat handle */ + xhandle = xenstat_init(); + if (xhandle == NULL) + fail("Failed to initialize xenstat library\n"); + + /* Begin curses stuff */ + initscr(); + start_color(); + cbreak(); + noecho(); + nonl(); + keypad(stdscr, TRUE); + halfdelay(5); + use_default_colors(); + init_pair(1, -1, COLOR_YELLOW); + + do { + gettimeofday(&curtime, NULL); + if(ch != ERR || (curtime.tv_sec - oldtime.tv_sec) >= delay) { + clear(); + top(); + oldtime = curtime; + refresh(); + } + ch = getch(); + } while (handle_key(ch)); + + /* Cleanup occurs in cleanup(), so no work to do here. */ + + return 0; +} diff -r cd984b3478f6 -r cc5f88b719d0 linux-2.6-xen-sparse/arch/xen/x86_64/mm/ioremap.c --- a/linux-2.6-xen-sparse/arch/xen/x86_64/mm/ioremap.c Mon Aug 22 18:37:48 2005 +++ /dev/null Tue Aug 23 19:03:21 2005 @@ -1,497 +0,0 @@ -/* - * arch/x86_64/mm/ioremap.c - * - * Re-map IO memory to kernel address space so that we can access it. - * This is needed for high PCI addresses that aren't mapped in the - * 640k-1MB IO memory area on PC's - * - * (C) Copyright 1995 1996 Linus Torvalds - */ - -#include <linux/vmalloc.h> -#include <linux/init.h> -#include <linux/slab.h> -#include <linux/module.h> -#include <asm/io.h> -#include <asm/fixmap.h> -#include <asm/cacheflush.h> -#include <asm/tlbflush.h> -#include <asm/pgtable.h> -#include <asm/pgalloc.h> - -/* - * Reuse arch/xen/i396/mm/ioremap.c. Need to merge later - */ -#ifndef CONFIG_XEN_PHYSDEV_ACCESS - -void * __ioremap(unsigned long phys_addr, unsigned long size, - unsigned long flags) -{ - return NULL; -} - -void *ioremap_nocache (unsigned long phys_addr, unsigned long size) -{ - return NULL; -} - -void iounmap(volatile void __iomem *addr) -{ -} - -void __init *bt_ioremap(unsigned long phys_addr, unsigned long size) -{ - return NULL; -} - -void __init bt_iounmap(void *addr, unsigned long size) -{ -} - -#else - -#if defined(__i386__) -/* - * Does @address reside within a non-highmem page that is local to this virtual - * machine (i.e., not an I/O page, nor a memory page belonging to another VM). - * See the comment that accompanies pte_pfn() in pgtable-2level.h to understand - * why this works. - */ -static inline int is_local_lowmem(unsigned long address) -{ - extern unsigned long max_low_pfn; - unsigned long mfn = address >> PAGE_SHIFT; - unsigned long pfn = mfn_to_pfn(mfn); - return ((pfn < max_low_pfn) && (pfn_to_mfn(pfn) == mfn)); -} -#elif defined(__x86_64__) -/* - * - */ -static inline int is_local_lowmem(unsigned long address) -{ - return 0; -} -#endif - -/* - * Generic mapping function (not visible outside): - */ - -/* - * Remap an arbitrary physical address space into the kernel virtual - * address space. Needed when the kernel wants to access high addresses - * directly. - * - * NOTE! We need to allow non-page-aligned mappings too: we will obviously - * have to convert them into an offset in a page-aligned mapping, but the - * caller shouldn't need to know that small detail. - */ -void __iomem * __ioremap(unsigned long phys_addr, unsigned long size, unsigned long flags) -{ - void __iomem * addr; - struct vm_struct * area; - unsigned long offset, last_addr; - domid_t domid = DOMID_IO; - - /* Don't allow wraparound or zero size */ - last_addr = phys_addr + size - 1; - if (!size || last_addr < phys_addr) - return NULL; - -#ifdef CONFIG_XEN_PRIVILEGED_GUEST - /* - * Don't remap the low PCI/ISA area, it's always mapped.. - */ - if (phys_addr >= 0x0 && last_addr < 0x100000) - return isa_bus_to_virt(phys_addr); -#endif - - /* - * Don't allow anybody to remap normal RAM that we're using.. - */ - if (is_local_lowmem(phys_addr)) { - char *t_addr, *t_end; - struct page *page; - - t_addr = bus_to_virt(phys_addr); - t_end = t_addr + (size - 1); - - for(page = virt_to_page(t_addr); page <= virt_to_page(t_end); page++) - if(!PageReserved(page)) - return NULL; - - domid = DOMID_LOCAL; - } - - /* - * Mappings have to be page-aligned - */ - offset = phys_addr & ~PAGE_MASK; - phys_addr &= PAGE_MASK; - size = PAGE_ALIGN(last_addr+1) - phys_addr; - - /* - * Ok, go for it.. - */ - area = get_vm_area(size, VM_IOREMAP | (flags << 20)); - if (!area) - return NULL; - area->phys_addr = phys_addr; - addr = (void __iomem *) area->addr; - if (direct_remap_area_pages(&init_mm, (unsigned long) addr, phys_addr, - size, __pgprot(_PAGE_PRESENT | _PAGE_RW | - _PAGE_DIRTY | _PAGE_ACCESSED -#if defined(__x86_64__) - | _PAGE_USER -#endif - | flags), domid)) { - vunmap((void __force *) addr); - return NULL; - } - return (void __iomem *) (offset + (char __iomem *)addr); -} - - -/** - * ioremap_nocache - map bus memory into CPU space - * @offset: bus address of the memory - * @size: size of the resource to map - * - * ioremap_nocache performs a platform specific sequence of operations to - * make bus memory CPU accessible via the readb/readw/readl/writeb/ - * writew/writel functions and the other mmio helpers. The returned - * address is not guaranteed to be usable directly as a virtual - * address. - * - * This version of ioremap ensures that the memory is marked uncachable - * on the CPU as well as honouring existing caching rules from things like - * the PCI bus. Note that there are other caches and buffers on many - * busses. In particular driver authors should read up on PCI writes - * - * It's useful if some control registers are in such an area and - * write combining or read caching is not desirable: - * - * Must be freed with iounmap. - */ - -void __iomem *ioremap_nocache (unsigned long phys_addr, unsigned long size) -{ - unsigned long last_addr; - void __iomem *p = __ioremap(phys_addr, size, _PAGE_PCD); - if (!p) - return p; - - /* Guaranteed to be > phys_addr, as per __ioremap() */ - last_addr = phys_addr + size - 1; - - if (is_local_lowmem(last_addr)) { - struct page *ppage = virt_to_page(bus_to_virt(phys_addr)); - unsigned long npages; - - phys_addr &= PAGE_MASK; - - /* This might overflow and become zero.. */ - last_addr = PAGE_ALIGN(last_addr); - - /* .. but that's ok, because modulo-2**n arithmetic will make - * the page-aligned "last - first" come out right. - */ - npages = (last_addr - phys_addr) >> PAGE_SHIFT; - - if (change_page_attr(ppage, npages, PAGE_KERNEL_NOCACHE) < 0) { - iounmap(p); - p = NULL; - } - global_flush_tlb(); - } - - return p; -} - -void iounmap(volatile void __iomem *addr) -{ - struct vm_struct *p; - if ((void __force *) addr <= high_memory) - return; -#ifdef CONFIG_XEN_PRIVILEGED_GUEST - if ((unsigned long) addr >= fix_to_virt(FIX_ISAMAP_BEGIN)) - return; -#endif - p = remove_vm_area((void *) (PAGE_MASK & (unsigned long __force) addr)); - if (!p) { - printk("__iounmap: bad address %p\n", addr); - return; - } - - if ((p->flags >> 20) && is_local_lowmem(p->phys_addr)) { - /* p->size includes the guard page, but cpa doesn't like that */ - change_page_attr(virt_to_page(bus_to_virt(p->phys_addr)), - (p->size - PAGE_SIZE) >> PAGE_SHIFT, - PAGE_KERNEL); - global_flush_tlb(); - } - kfree(p); -} - -#if defined(__i386__) -void __init *bt_ioremap(unsigned long phys_addr, unsigned long size) -{ - unsigned long offset, last_addr; - unsigned int nrpages; - enum fixed_addresses idx; - - /* Don't allow wraparound or zero size */ - last_addr = phys_addr + size - 1; - if (!size || last_addr < phys_addr) - return NULL; - -#ifdef CONFIG_XEN_PRIVILEGED_GUEST - /* - * Don't remap the low PCI/ISA area, it's always mapped.. - */ - if (phys_addr >= 0x0 && last_addr < 0x100000) - return isa_bus_to_virt(phys_addr); -#endif - - /* - * Mappings have to be page-aligned - */ - offset = phys_addr & ~PAGE_MASK; - phys_addr &= PAGE_MASK; - size = PAGE_ALIGN(last_addr) - phys_addr; - - /* - * Mappings have to fit in the FIX_BTMAP area. - */ - nrpages = size >> PAGE_SHIFT; - if (nrpages > NR_FIX_BTMAPS) - return NULL; - - /* - * Ok, go for it.. - */ - idx = FIX_BTMAP_BEGIN; - while (nrpages > 0) { - set_fixmap(idx, phys_addr); - phys_addr += PAGE_SIZE; - --idx; - --nrpages; - } - return (void*) (offset + fix_to_virt(FIX_BTMAP_BEGIN)); -} - -void __init bt_iounmap(void *addr, unsigned long size) -{ - unsigned long virt_addr; - unsigned long offset; - unsigned int nrpages; - enum fixed_addresses idx; - - virt_addr = (unsigned long)addr; - if (virt_addr < fix_to_virt(FIX_BTMAP_BEGIN)) - return; -#ifdef CONFIG_XEN_PRIVILEGED_GUEST - if (virt_addr >= fix_to_virt(FIX_ISAMAP_BEGIN)) - return; -#endif - offset = virt_addr & ~PAGE_MASK; - nrpages = PAGE_ALIGN(offset + size - 1) >> PAGE_SHIFT; - - idx = FIX_BTMAP_BEGIN; - while (nrpages > 0) { - clear_fixmap(idx); - --idx; - --nrpages; - } -} -#endif /* defined(__i386__) */ - -#endif /* CONFIG_XEN_PHYSDEV_ACCESS */ - -/* These hacky macros avoid phys->machine translations. */ -#define __direct_pte(x) ((pte_t) { (x) } ) -#define __direct_mk_pte(page_nr,pgprot) \ - __direct_pte(((page_nr) << PAGE_SHIFT) | pgprot_val(pgprot)) -#define direct_mk_pte_phys(physpage, pgprot) \ - __direct_mk_pte((physpage) >> PAGE_SHIFT, pgprot) - -static inline void direct_remap_area_pte(pte_t *pte, - unsigned long address, - unsigned long size, - mmu_update_t **v) -{ - unsigned long end; - - address &= ~PMD_MASK; - end = address + size; - if (end > PMD_SIZE) - end = PMD_SIZE; - if (address >= end) - BUG(); - - do { - (*v)->ptr = virt_to_machine(pte); - (*v)++; - address += PAGE_SIZE; - pte++; - } while (address && (address < end)); -} - -static inline int direct_remap_area_pmd(struct mm_struct *mm, - pmd_t *pmd, - unsigned long address, - unsigned long size, - mmu_update_t **v) -{ - unsigned long end; - - address &= ~PGDIR_MASK; - end = address + size; - if (end > PGDIR_SIZE) - end = PGDIR_SIZE; - if (address >= end) - BUG(); - do { - pte_t *pte = (mm == &init_mm) ? - pte_alloc_kernel(mm, pmd, address) : - pte_alloc_map(mm, pmd, address); - if (!pte) - return -ENOMEM; - direct_remap_area_pte(pte, address, end - address, v); - pte_unmap(pte); - address = (address + PMD_SIZE) & PMD_MASK; - pmd++; - } while (address && (address < end)); - return 0; -} - -int __direct_remap_area_pages(struct mm_struct *mm, - unsigned long address, - unsigned long size, - mmu_update_t *v) -{ - pgd_t * dir; - unsigned long end = address + size; - int error; - -#if defined(__i386__) - dir = pgd_offset(mm, address); -#elif defined (__x86_64) - dir = (mm == &init_mm) ? - pgd_offset_k(address): - pgd_offset(mm, address); -#endif - if (address >= end) - BUG(); - spin_lock(&mm->page_table_lock); - do { - pud_t *pud; - pmd_t *pmd; - - error = -ENOMEM; - pud = pud_alloc(mm, dir, address); - if (!pud) - break; - pmd = pmd_alloc(mm, pud, address); - if (!pmd) - break; - error = 0; - direct_remap_area_pmd(mm, pmd, address, end - address, &v); - address = (address + PGDIR_SIZE) & PGDIR_MASK; - dir++; - - } while (address && (address < end)); - spin_unlock(&mm->page_table_lock); - return error; -} - - -int direct_remap_area_pages(struct mm_struct *mm, - unsigned long address, - unsigned long machine_addr, - unsigned long size, - pgprot_t prot, - domid_t domid) -{ - int i; - unsigned long start_address; -#define MAX_DIRECTMAP_MMU_QUEUE 130 - mmu_update_t u[MAX_DIRECTMAP_MMU_QUEUE], *v = u; - - start_address = address; - - flush_cache_all(); - - for (i = 0; i < size; i += PAGE_SIZE) { - if ((v - u) == MAX_DIRECTMAP_MMU_QUEUE) { - /* Fill in the PTE pointers. */ - __direct_remap_area_pages(mm, - start_address, - address-start_address, - u); - - if (HYPERVISOR_mmu_update(u, v - u, NULL, domid) < 0) - return -EFAULT; - v = u; - start_address = address; - } - - /* - * Fill in the machine address: PTE ptr is done later by - * __direct_remap_area_pages(). - */ - v->val = (machine_addr & PAGE_MASK) | pgprot_val(prot); - - machine_addr += PAGE_SIZE; - address += PAGE_SIZE; - v++; - } - - if (v != u) { - /* get the ptep's filled in */ - __direct_remap_area_pages(mm, - start_address, - address-start_address, - u); - if (unlikely(HYPERVISOR_mmu_update(u, v - u, NULL, domid) < 0)) - return -EFAULT; - } - - flush_tlb_all(); - - return 0; -} - -EXPORT_SYMBOL(direct_remap_area_pages); - -int create_lookup_pte_addr(struct mm_struct *mm, - unsigned long address, - unsigned long *ptep) -{ - int f(pte_t *pte, struct page *pte_page, unsigned long addr, void *data) - { - unsigned long *ptep = (unsigned long *)data; - if (ptep) *ptep = (pfn_to_mfn(page_to_pfn(pte_page)) << PAGE_SHIFT) - | ((unsigned long)pte & ~PAGE_MASK); - return 0; - } - - return generic_page_range(mm, address, PAGE_SIZE, f, ptep); -} - -EXPORT_SYMBOL(create_lookup_pte_addr); - -int touch_pte_range(struct mm_struct *mm, - unsigned long address, - unsigned long size) -{ - int f(pte_t *pte, struct page *pte_page, unsigned long addr, void *data) - { - return 0; - } - - return generic_page_range(mm, address, size, f, NULL); -} - -EXPORT_SYMBOL(touch_pte_range); _______________________________________________ Xen-changelog mailing list Xen-changelog@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-changelog
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |