Xen project Mailing List

[Xen-changelog] Merge.

From: Xen patchbot -unstable <patchbot-unstable@xxxxxxxxxxxxxxxxxxx>

Date: Wed, 31 Aug 2005 08:31:25 +0000

Delivery-date: Wed, 31 Aug 2005 08:59:01 +0000

List-id: BK change log <xen-changelog.lists.xensource.com>

# HG changeset patch # User adsharma@xxxxxxxxxxxxxxxxxxxx # Node ID a698bd49931b7c65d17676d25e8783dd274c1433 # Parent dfaf788ab18cdd92f626380ddd97a64fa92abbcd # Parent b3785cbb723b8b355c1282232de0bd1cfbfb3556 Merge. diff -r dfaf788ab18c -r a698bd49931b linux-2.4-xen-sparse/mkbuildtree --- a/linux-2.4-xen-sparse/mkbuildtree Fri Aug 26 20:47:16 2005 +++ b/linux-2.4-xen-sparse/mkbuildtree Mon Aug 29 20:19:24 2005 @@ -102,9 +102,9 @@ relative_lndir ${RS} rm -f mkbuildtree -set ${RS}/../linux-2.6-xen-sparse -[ "$1" == "${RS}/../linux-2.6-xen-sparse" ] && { echo "no Linux 2.6 sparse tree at ${RS}/../linux-2.6-xen-sparse"; exit 1; } -LINUX_26="$1" +LINUX_26=${RS}/../linux-2.6-xen-sparse +[ -d $LINUX_26 ] || { echo "no Linux 2.6 sparse tree at ${RS}/../linux-2.6-xen-sparse"; exit 1; } + # Create links to the shared definitions of the Xen interfaces. rm -rf ${AD}/include/asm-xen/xen-public diff -r dfaf788ab18c -r a698bd49931b linux-2.6-xen-sparse/arch/xen/i386/Kconfig --- a/linux-2.6-xen-sparse/arch/xen/i386/Kconfig Fri Aug 26 20:47:16 2005 +++ b/linux-2.6-xen-sparse/arch/xen/i386/Kconfig Mon Aug 29 20:19:24 2005 @@ -807,8 +807,8 @@ direct access method and falls back to the BIOS if that doesn't work. If unsure, go with the default, which is "Any". -config PCI_GOBIOS - bool "BIOS" +#config PCI_GOBIOS +# bool "BIOS" config PCI_GOMMCONFIG bool "MMConfig" @@ -821,10 +821,10 @@ endchoice -config PCI_BIOS - bool - depends on !X86_VISWS && PCI && (PCI_GOBIOS || PCI_GOANY) - default y +#config PCI_BIOS +# bool +# depends on !X86_VISWS && PCI && (PCI_GOBIOS || PCI_GOANY) +# default y config PCI_DIRECT bool diff -r dfaf788ab18c -r a698bd49931b linux-2.6-xen-sparse/arch/xen/i386/kernel/acpi/boot.c --- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/acpi/boot.c Fri Aug 26 20:47:16 2005 +++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/acpi/boot.c Mon Aug 29 20:19:24 2005 @@ -610,7 +610,7 @@ acpi_fadt.force_apic_physical_destination_mode = fadt->force_apic_physical_destination_mode; #endif -#ifdef CONFIG_X86_PM_TIMER +#if defined(CONFIG_X86_PM_TIMER) && !defined(CONFIG_XEN) /* detect the location of the ACPI PM Timer */ if (fadt->revision >= FADT2_REVISION_ID) { /* FADT rev. 2 */ diff -r dfaf788ab18c -r a698bd49931b linux-2.6-xen-sparse/arch/xen/i386/kernel/process.c --- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/process.c Fri Aug 26 20:47:16 2005 +++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/process.c Mon Aug 29 20:19:24 2005 @@ -135,6 +135,10 @@ * low exit latency (ie sit in a loop waiting for * somebody to say that they'd like to reschedule) */ +#ifdef CONFIG_SMP +extern void smp_suspend(void); +extern void smp_resume(void); +#endif void cpu_idle (void) { int cpu = _smp_processor_id(); @@ -149,6 +153,9 @@ if (cpu_is_offline(cpu)) { local_irq_disable(); +#ifdef CONFIG_SMP + smp_suspend(); +#endif #if defined(CONFIG_XEN) && defined(CONFIG_HOTPLUG_CPU) /* Ack it. From this point on until we get woken up, we're not allowed @@ -159,6 +166,9 @@ HYPERVISOR_vcpu_down(cpu); #endif play_dead(); +#ifdef CONFIG_SMP + smp_resume(); +#endif local_irq_enable(); } @@ -789,10 +799,3 @@ sp -= get_random_int() % 8192; return sp & ~0xf; } - - -#ifndef CONFIG_X86_SMP -void _restore_vcpu(void) -{ -} -#endif diff -r dfaf788ab18c -r a698bd49931b linux-2.6-xen-sparse/arch/xen/i386/kernel/smpboot.c --- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/smpboot.c Fri Aug 26 20:47:16 2005 +++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/smpboot.c Mon Aug 29 20:19:24 2005 @@ -1601,32 +1601,71 @@ void smp_suspend(void) { - /* XXX todo: take down time and ipi's on all cpus */ local_teardown_timer_irq(); smp_intr_exit(); } void smp_resume(void) { - /* XXX todo: restore time and ipi's on all cpus */ smp_intr_init(); local_setup_timer_irq(); } -DECLARE_PER_CPU(int, timer_irq); - -void _restore_vcpu(void) -{ - int cpu = smp_processor_id(); - extern atomic_t vcpus_rebooting; - - /* We are the first thing the vcpu runs when it comes back, - and we are supposed to restore the IPIs and timer - interrupts etc. When we return, the vcpu's idle loop will - start up again. */ - _bind_virq_to_irq(VIRQ_TIMER, cpu, per_cpu(timer_irq, cpu)); - _bind_virq_to_irq(VIRQ_DEBUG, cpu, per_cpu(ldebug_irq, cpu)); - _bind_ipi_to_irq(RESCHEDULE_VECTOR, cpu, per_cpu(resched_irq, cpu) ); - _bind_ipi_to_irq(CALL_FUNCTION_VECTOR, cpu, per_cpu(callfunc_irq, cpu) ); +static atomic_t vcpus_rebooting; + +static void restore_vcpu_ready(void) +{ + atomic_dec(&vcpus_rebooting); } + +void save_vcpu_context(int vcpu, vcpu_guest_context_t *ctxt) +{ + int r; + int gdt_pages; + r = HYPERVISOR_vcpu_pickle(vcpu, ctxt); + if (r != 0) + panic("pickling vcpu %d -> %d!\n", vcpu, r); + + /* Translate from machine to physical addresses where necessary, + so that they can be translated to our new machine address space + after resume. libxc is responsible for doing this to vcpu0, + but we do it to the others. */ + gdt_pages = (ctxt->gdt_ents + 511) / 512; + ctxt->ctrlreg[3] = machine_to_phys(ctxt->ctrlreg[3]); + for (r = 0; r < gdt_pages; r++) + ctxt->gdt_frames[r] = mfn_to_pfn(ctxt->gdt_frames[r]); +} + +int restore_vcpu_context(int vcpu, vcpu_guest_context_t *ctxt) +{ + int r; + int gdt_pages = (ctxt->gdt_ents + 511) / 512; + + /* This is kind of a hack, and implicitly relies on the fact that + the vcpu stops in a place where all of the call clobbered + registers are already dead. */ + ctxt->user_regs.esp -= 4; + ((unsigned long *)ctxt->user_regs.esp)[0] = ctxt->user_regs.eip; + ctxt->user_regs.eip = (unsigned long)restore_vcpu_ready; + + /* De-canonicalise. libxc handles this for vcpu 0, but we need + to do it for the other vcpus. */ + ctxt->ctrlreg[3] = phys_to_machine(ctxt->ctrlreg[3]); + for (r = 0; r < gdt_pages; r++) + ctxt->gdt_frames[r] = pfn_to_mfn(ctxt->gdt_frames[r]); + + atomic_set(&vcpus_rebooting, 1); + r = HYPERVISOR_boot_vcpu(vcpu, ctxt); + if (r != 0) { + printk(KERN_EMERG "Failed to reboot vcpu %d (%d)\n", vcpu, r); + return -1; + } + + /* Make sure we wait for the new vcpu to come up before trying to do + anything with it or starting the next one. */ + while (atomic_read(&vcpus_rebooting)) + barrier(); + + return 0; +} diff -r dfaf788ab18c -r a698bd49931b linux-2.6-xen-sparse/arch/xen/i386/mm/init.c --- a/linux-2.6-xen-sparse/arch/xen/i386/mm/init.c Fri Aug 26 20:47:16 2005 +++ b/linux-2.6-xen-sparse/arch/xen/i386/mm/init.c Mon Aug 29 20:19:24 2005 @@ -219,6 +219,8 @@ } } +#ifndef CONFIG_XEN + static inline int page_kills_ppro(unsigned long pagenr) { if (pagenr >= 0x70000 && pagenr <= 0x7003F) @@ -265,6 +267,13 @@ } return 0; } + +#else /* CONFIG_XEN */ + +#define page_kills_ppro(p) 0 +#define page_is_ram(p) 1 + +#endif #ifdef CONFIG_HIGHMEM pte_t *kmap_pte; diff -r dfaf788ab18c -r a698bd49931b linux-2.6-xen-sparse/arch/xen/i386/pci/Makefile --- a/linux-2.6-xen-sparse/arch/xen/i386/pci/Makefile Fri Aug 26 20:47:16 2005 +++ b/linux-2.6-xen-sparse/arch/xen/i386/pci/Makefile Mon Aug 29 20:19:24 2005 @@ -4,7 +4,7 @@ c-obj-y := i386.o -c-obj-$(CONFIG_PCI_BIOS) += pcbios.o +#c-obj-$(CONFIG_PCI_BIOS) += pcbios.o c-obj-$(CONFIG_PCI_MMCONFIG) += mmconfig.o c-obj-$(CONFIG_PCI_DIRECT) += direct.o diff -r dfaf788ab18c -r a698bd49931b linux-2.6-xen-sparse/arch/xen/kernel/evtchn.c --- a/linux-2.6-xen-sparse/arch/xen/kernel/evtchn.c Fri Aug 26 20:47:16 2005 +++ b/linux-2.6-xen-sparse/arch/xen/kernel/evtchn.c Mon Aug 29 20:19:24 2005 @@ -245,74 +245,6 @@ spin_unlock(&irq_mapping_update_lock); } -/* This is only used when a vcpu from an xm save. The ipi is expected - to have been bound before we suspended, and so all of the xenolinux - state is set up; we only need to restore the Xen side of things. - The irq number has to be the same, but the evtchn number can - change. */ -void _bind_ipi_to_irq(int ipi, int vcpu, int irq) -{ - evtchn_op_t op; - int evtchn; - - spin_lock(&irq_mapping_update_lock); - - op.cmd = EVTCHNOP_bind_ipi; - if ( HYPERVISOR_event_channel_op(&op) != 0 ) - panic("Failed to bind virtual IPI %d on cpu %d\n", ipi, vcpu); - evtchn = op.u.bind_ipi.port; - - printk("<0>IPI %d, old evtchn %d, evtchn %d.\n", - ipi, per_cpu(ipi_to_evtchn, vcpu)[ipi], - evtchn); - - evtchn_to_irq[irq_to_evtchn[irq]] = -1; - irq_to_evtchn[irq] = -1; - - evtchn_to_irq[evtchn] = irq; - irq_to_evtchn[irq] = evtchn; - - printk("<0>evtchn_to_irq[%d] = %d.\n", evtchn, - evtchn_to_irq[evtchn]); - per_cpu(ipi_to_evtchn, vcpu)[ipi] = evtchn; - - bind_evtchn_to_cpu(evtchn, vcpu); - - spin_unlock(&irq_mapping_update_lock); - - clear_bit(evtchn, (unsigned long *)HYPERVISOR_shared_info->evtchn_mask); - clear_bit(evtchn, (unsigned long *)HYPERVISOR_shared_info->evtchn_pending); -} - -void _bind_virq_to_irq(int virq, int cpu, int irq) -{ - evtchn_op_t op; - int evtchn; - - spin_lock(&irq_mapping_update_lock); - - op.cmd = EVTCHNOP_bind_virq; - op.u.bind_virq.virq = virq; - if ( HYPERVISOR_event_channel_op(&op) != 0 ) - panic("Failed to bind virtual IRQ %d\n", virq); - evtchn = op.u.bind_virq.port; - - evtchn_to_irq[irq_to_evtchn[irq]] = -1; - irq_to_evtchn[irq] = -1; - - evtchn_to_irq[evtchn] = irq; - irq_to_evtchn[irq] = evtchn; - - per_cpu(virq_to_irq, cpu)[virq] = irq; - - bind_evtchn_to_cpu(evtchn, cpu); - - spin_unlock(&irq_mapping_update_lock); - - clear_bit(evtchn, (unsigned long *)HYPERVISOR_shared_info->evtchn_mask); - clear_bit(evtchn, (unsigned long *)HYPERVISOR_shared_info->evtchn_pending); -} - int bind_ipi_to_irq(int ipi) { evtchn_op_t op; diff -r dfaf788ab18c -r a698bd49931b linux-2.6-xen-sparse/arch/xen/kernel/reboot.c --- a/linux-2.6-xen-sparse/arch/xen/kernel/reboot.c Fri Aug 26 20:47:16 2005 +++ b/linux-2.6-xen-sparse/arch/xen/kernel/reboot.c Mon Aug 29 20:19:24 2005 @@ -65,66 +65,13 @@ #define cpu_up(x) (-EOPNOTSUPP) #endif -static void save_vcpu_context(int vcpu, vcpu_guest_context_t *ctxt) -{ - int r; - int gdt_pages; - r = HYPERVISOR_vcpu_pickle(vcpu, ctxt); - if (r != 0) - panic("pickling vcpu %d -> %d!\n", vcpu, r); - - /* Translate from machine to physical addresses where necessary, - so that they can be translated to our new machine address space - after resume. libxc is responsible for doing this to vcpu0, - but we do it to the others. */ - gdt_pages = (ctxt->gdt_ents + 511) / 512; - ctxt->ctrlreg[3] = machine_to_phys(ctxt->ctrlreg[3]); - for (r = 0; r < gdt_pages; r++) - ctxt->gdt_frames[r] = mfn_to_pfn(ctxt->gdt_frames[r]); -} - -void _restore_vcpu(int cpu); - -atomic_t vcpus_rebooting; - -static int restore_vcpu_context(int vcpu, vcpu_guest_context_t *ctxt) -{ - int r; - int gdt_pages = (ctxt->gdt_ents + 511) / 512; - - /* This is kind of a hack, and implicitly relies on the fact that - the vcpu stops in a place where all of the call clobbered - registers are already dead. */ - ctxt->user_regs.esp -= 4; - ((unsigned long *)ctxt->user_regs.esp)[0] = ctxt->user_regs.eip; - ctxt->user_regs.eip = (unsigned long)_restore_vcpu; - - /* De-canonicalise. libxc handles this for vcpu 0, but we need - to do it for the other vcpus. */ - ctxt->ctrlreg[3] = phys_to_machine(ctxt->ctrlreg[3]); - for (r = 0; r < gdt_pages; r++) - ctxt->gdt_frames[r] = pfn_to_mfn(ctxt->gdt_frames[r]); - - atomic_set(&vcpus_rebooting, 1); - r = HYPERVISOR_boot_vcpu(vcpu, ctxt); - if (r != 0) { - printk(KERN_EMERG "Failed to reboot vcpu %d (%d)\n", vcpu, r); - return -1; - } - - /* Make sure we wait for the new vcpu to come up before trying to do - anything with it or starting the next one. */ - while (atomic_read(&vcpus_rebooting)) - barrier(); - - return 0; -} +#ifdef CONFIG_SMP +#endif static int __do_suspend(void *ignore) { int i, j; suspend_record_t *suspend_record; - static vcpu_guest_context_t suspended_cpu_records[NR_CPUS]; /* Hmmm... a cleaner interface to suspend/resume blkdevs would be nice. */ /* XXX SMH: yes it would :-( */ @@ -138,16 +85,22 @@ extern int gnttab_suspend(void); extern int gnttab_resume(void); -#ifdef CONFIG_SMP - extern void smp_suspend(void); - extern void smp_resume(void); -#endif extern void time_suspend(void); extern void time_resume(void); extern unsigned long max_pfn; extern unsigned int *pfn_to_mfn_frame_list; +#ifdef CONFIG_SMP + extern void smp_suspend(void); + extern void smp_resume(void); + + static vcpu_guest_context_t suspended_cpu_records[NR_CPUS]; cpumask_t prev_online_cpus, prev_present_cpus; + + void save_vcpu_context(int vcpu, vcpu_guest_context_t *ctxt); + int restore_vcpu_context(int vcpu, vcpu_guest_context_t *ctxt); +#endif + int err = 0; BUG_ON(smp_processor_id() != 0); @@ -164,6 +117,8 @@ if ( suspend_record == NULL ) goto out; + preempt_disable(); +#ifdef CONFIG_SMP /* Take all of the other cpus offline. We need to be careful not to get preempted between the final test for num_online_cpus() == 1 and disabling interrupts, since otherwise userspace could @@ -175,7 +130,6 @@ since by the time num_online_cpus() == 1, there aren't any other cpus) */ cpus_clear(prev_online_cpus); - preempt_disable(); while (num_online_cpus() > 1) { preempt_enable(); for_each_online_cpu(i) { @@ -190,6 +144,7 @@ } preempt_disable(); } +#endif suspend_record->nr_pfns = max_pfn; /* final number of pfns */ @@ -197,6 +152,7 @@ preempt_enable(); +#ifdef CONFIG_SMP cpus_clear(prev_present_cpus); for_each_present_cpu(i) { if (i == 0) @@ -204,6 +160,7 @@ save_vcpu_context(i, &suspended_cpu_records[i]); cpu_set(i, prev_present_cpus); } +#endif #ifdef __i386__ mm_pin_all(); @@ -269,12 +226,14 @@ usbif_resume(); - for_each_cpu_mask(i, prev_present_cpus) { +#ifdef CONFIG_SMP + for_each_cpu_mask(i, prev_present_cpus) restore_vcpu_context(i, &suspended_cpu_records[i]); - } +#endif __sti(); +#ifdef CONFIG_SMP out_reenable_cpus: for_each_cpu_mask(i, prev_online_cpus) { j = cpu_up(i); @@ -284,6 +243,7 @@ err = j; } } +#endif out: if ( suspend_record != NULL ) diff -r dfaf788ab18c -r a698bd49931b linux-2.6-xen-sparse/arch/xen/x86_64/kernel/Makefile --- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/Makefile Fri Aug 26 20:47:16 2005 +++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/Makefile Mon Aug 29 20:19:24 2005 @@ -40,7 +40,7 @@ i386-obj-$(CONFIG_DUMMY_IOMMU) += pci-dma.o i386-obj-$(CONFIG_SWIOTLB) += swiotlb.o obj-$(CONFIG_KPROBES) += kprobes.o -obj-$(CONFIG_X86_PM_TIMER) += pmtimer.o +#obj-$(CONFIG_X86_PM_TIMER) += pmtimer.o c-obj-$(CONFIG_MODULES) += module.o diff -r dfaf788ab18c -r a698bd49931b linux-2.6-xen-sparse/arch/xen/x86_64/kernel/smpboot.c --- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/smpboot.c Fri Aug 26 20:47:16 2005 +++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/smpboot.c Mon Aug 29 20:19:24 2005 @@ -1277,21 +1277,23 @@ void smp_suspend(void) { - /* XXX todo: take down time and ipi's on all cpus */ local_teardown_timer_irq(); smp_intr_exit(); } void smp_resume(void) { - /* XXX todo: restore time and ipi's on all cpus */ smp_intr_init(); local_setup_timer_irq(); } -void _restore_vcpu(void) -{ - /* XXX need to write this */ -} - -#endif +void save_vcpu_context(int vcpu, vcpu_guest_context_t *ctxt) +{ +} + +int restore_vcpu_context(int vcpu, vcpu_guest_context_t *ctxt) +{ + return 0; +} + +#endif diff -r dfaf788ab18c -r a698bd49931b linux-2.6-xen-sparse/arch/xen/x86_64/mm/init.c --- a/linux-2.6-xen-sparse/arch/xen/x86_64/mm/init.c Fri Aug 26 20:47:16 2005 +++ b/linux-2.6-xen-sparse/arch/xen/x86_64/mm/init.c Mon Aug 29 20:19:24 2005 @@ -767,9 +767,6 @@ static inline int page_is_ram (unsigned long pagenr) { - if (pagenr < start_pfn || pagenr >= end_pfn) - return 0; - return 1; } diff -r dfaf788ab18c -r a698bd49931b linux-2.6-xen-sparse/drivers/xen/balloon/balloon.c --- a/linux-2.6-xen-sparse/drivers/xen/balloon/balloon.c Fri Aug 26 20:47:16 2005 +++ b/linux-2.6-xen-sparse/drivers/xen/balloon/balloon.c Mon Aug 29 20:19:24 2005 @@ -295,10 +295,10 @@ /* React to a change in the target key */ static void watch_target(struct xenbus_watch *watch, const char *node) { - unsigned long new_target; + unsigned long long new_target; int err; - err = xenbus_scanf("memory", "target", "%lu", &new_target); + err = xenbus_scanf("memory", "target", "%llu", &new_target); if (err != 1) { printk(KERN_ERR "Unable to read memory/target\n"); return; diff -r dfaf788ab18c -r a698bd49931b linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c --- a/linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c Fri Aug 26 20:47:16 2005 +++ b/linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c Mon Aug 29 20:19:24 2005 @@ -32,23 +32,15 @@ */ #if 1 -#define ASSERT(_p) \ - if ( !(_p) ) { printk("Assertion '%s' failed, line %d, file %s", #_p , \ - __LINE__, __FILE__); *(int*)0=0; } +#define ASSERT(p) \ + if (!(p)) { printk("Assertion '%s' failed, line %d, file %s", #p , \ + __LINE__, __FILE__); *(int*)0=0; } #else #define ASSERT(_p) #endif #include <linux/version.h> - -#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0) #include "block.h" -#else -#include "common.h" -#include <linux/blk.h> -#include <linux/tqueue.h> -#endif - #include <linux/cdrom.h> #include <linux/sched.h> #include <linux/interrupt.h> @@ -58,90 +50,56 @@ #include <asm-xen/xen-public/grant_table.h> #include <asm-xen/gnttab.h> -typedef unsigned char byte; /* from linux/ide.h */ - -/* Control whether runtime update of vbds is enabled. */ -#define ENABLE_VBD_UPDATE 1 - #define BLKIF_STATE_DISCONNECTED 0 #define BLKIF_STATE_CONNECTED 1 static unsigned int blkif_state = BLKIF_STATE_DISCONNECTED; - -#define BLK_RING_SIZE __RING_SIZE((blkif_sring_t *)0, PAGE_SIZE) #define MAXIMUM_OUTSTANDING_BLOCK_REQS \ (BLKIF_MAX_SEGMENTS_PER_REQUEST * BLKIF_RING_SIZE) #define GRANTREF_INVALID (1<<15) -static struct blk_shadow { - blkif_request_t req; - unsigned long request; - unsigned long frame[BLKIF_MAX_SEGMENTS_PER_REQUEST]; -} blk_shadow[BLK_RING_SIZE]; -unsigned long blk_shadow_free; - static int recovery = 0; /* Recovery in progress: protected by blkif_io_lock */ static void kick_pending_request_queues(struct blkfront_info *info); -static int __init xlblk_init(void); - static void blkif_completion(struct blk_shadow *s); -static inline int GET_ID_FROM_FREELIST(void) -{ - unsigned long free = blk_shadow_free; - BUG_ON(free > BLK_RING_SIZE); - blk_shadow_free = blk_shadow[free].req.id; - blk_shadow[free].req.id = 0x0fffffee; /* debug */ - return free; -} - -static inline void ADD_ID_TO_FREELIST(unsigned long id) -{ - blk_shadow[id].req.id = blk_shadow_free; - blk_shadow[id].request = 0; - blk_shadow_free = id; -} - - -/************************ COMMON CODE (inlined) ************************/ - -/* Kernel-specific definitions used in the common code */ -#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0) -#define DISABLE_SCATTERGATHER() -#else -static int sg_operation = -1; -#define DISABLE_SCATTERGATHER() (sg_operation = -1) -#endif +static inline int GET_ID_FROM_FREELIST( + struct blkfront_info *info) +{ + unsigned long free = info->shadow_free; + BUG_ON(free > BLK_RING_SIZE); + info->shadow_free = info->shadow[free].req.id; + info->shadow[free].req.id = 0x0fffffee; /* debug */ + return free; +} + +static inline void ADD_ID_TO_FREELIST( + struct blkfront_info *info, unsigned long id) +{ + info->shadow[id].req.id = info->shadow_free; + info->shadow[id].request = 0; + info->shadow_free = id; +} static inline void pickle_request(struct blk_shadow *s, blkif_request_t *r) { - s->req = *r; + s->req = *r; } static inline void unpickle_request(blkif_request_t *r, struct blk_shadow *s) { - *r = s->req; -} - + *r = s->req; +} static inline void flush_requests(struct blkfront_info *info) { - DISABLE_SCATTERGATHER(); - RING_PUSH_REQUESTS(&info->ring); - notify_via_evtchn(info->evtchn); -} - - -/************************** KERNEL VERSION 2.6 **************************/ - -#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0) - -module_init(xlblk_init); + RING_PUSH_REQUESTS(&info->ring); + notify_via_evtchn(info->evtchn); +} static void kick_pending_request_queues(struct blkfront_info *info) { @@ -169,50 +127,44 @@ int blkif_open(struct inode *inode, struct file *filep) { - // struct gendisk *gd = inode->i_bdev->bd_disk; - // struct xlbd_disk_info *di = (struct xlbd_disk_info *)gd->private_data; - - /* Update of usage count is protected by per-device semaphore. */ - // di->mi->usage++; - return 0; } int blkif_release(struct inode *inode, struct file *filep) { - /* FIXME: This is where we can actually free up majors, etc. --RR */ - return 0; + return 0; } int blkif_ioctl(struct inode *inode, struct file *filep, unsigned command, unsigned long argument) { - int i; - - DPRINTK_IOCTL("command: 0x%x, argument: 0x%lx, dev: 0x%04x\n", - command, (long)argument, inode->i_rdev); - - switch ( command ) - { - case HDIO_GETGEO: - /* return ENOSYS to use defaults */ - return -ENOSYS; - - case CDROMMULTISESSION: - DPRINTK("FIXME: support multisession CDs later\n"); - for ( i = 0; i < sizeof(struct cdrom_multisession); i++ ) - if ( put_user(0, (byte *)(argument + i)) ) return -EFAULT; - return 0; - - default: - /*printk(KERN_ALERT "ioctl %08x not supported by Xen blkdev\n", - command);*/ - return -EINVAL; /* same return as native Linux */ - } - - return 0; + int i; + + DPRINTK_IOCTL("command: 0x%x, argument: 0x%lx, dev: 0x%04x\n", + command, (long)argument, inode->i_rdev); + + switch ( command ) + { + case HDIO_GETGEO: + /* return ENOSYS to use defaults */ + return -ENOSYS; + + case CDROMMULTISESSION: + DPRINTK("FIXME: support multisession CDs later\n"); + for (i = 0; i < sizeof(struct cdrom_multisession); i++) + if (put_user(0, (char *)(argument + i))) + return -EFAULT; + return 0; + + default: + /*printk(KERN_ALERT "ioctl %08x not supported by Xen blkdev\n", + command);*/ + return -EINVAL; /* same return as native Linux */ + } + + return 0; } @@ -228,76 +180,77 @@ */ static int blkif_queue_request(struct request *req) { - struct blkfront_info *info = req->rq_disk->private_data; - unsigned long buffer_ma; - blkif_request_t *ring_req; - struct bio *bio; - struct bio_vec *bvec; - int idx; - unsigned long id; - unsigned int fsect, lsect; - int ref; - grant_ref_t gref_head; - - if (unlikely(info->connected != BLKIF_STATE_CONNECTED)) - return 1; - - if (gnttab_alloc_grant_references(BLKIF_MAX_SEGMENTS_PER_REQUEST, - &gref_head) < 0) { - gnttab_request_free_callback(&info->callback, - blkif_restart_queue_callback, info, - BLKIF_MAX_SEGMENTS_PER_REQUEST); - return 1; - } - - /* Fill out a communications ring structure. */ - ring_req = RING_GET_REQUEST(&info->ring, info->ring.req_prod_pvt); - id = GET_ID_FROM_FREELIST(); - blk_shadow[id].request = (unsigned long)req; - - ring_req->id = id; - ring_req->operation = rq_data_dir(req) ? BLKIF_OP_WRITE : BLKIF_OP_READ; - ring_req->sector_number = (blkif_sector_t)req->sector; - ring_req->handle = info->handle; - - ring_req->nr_segments = 0; - rq_for_each_bio(bio, req) - { - bio_for_each_segment(bvec, bio, idx) - { - if ( ring_req->nr_segments == BLKIF_MAX_SEGMENTS_PER_REQUEST ) - BUG(); - buffer_ma = page_to_phys(bvec->bv_page); - fsect = bvec->bv_offset >> 9; - lsect = fsect + (bvec->bv_len >> 9) - 1; - /* install a grant reference. */ - ref = gnttab_claim_grant_reference(&gref_head); - ASSERT( ref != -ENOSPC ); - - gnttab_grant_foreign_access_ref( - ref, - info->backend_id, - buffer_ma >> PAGE_SHIFT, - rq_data_dir(req) ); - - blk_shadow[id].frame[ring_req->nr_segments] = - buffer_ma >> PAGE_SHIFT; - - ring_req->frame_and_sects[ring_req->nr_segments] = - blkif_fas_from_gref(ref, fsect, lsect); - - ring_req->nr_segments++; - } - } - - info->ring.req_prod_pvt++; - - /* Keep a private copy so we can reissue requests when recovering. */ - pickle_request(&blk_shadow[id], ring_req); - - gnttab_free_grant_references(gref_head); - - return 0; + struct blkfront_info *info = req->rq_disk->private_data; + unsigned long buffer_ma; + blkif_request_t *ring_req; + struct bio *bio; + struct bio_vec *bvec; + int idx; + unsigned long id; + unsigned int fsect, lsect; + int ref; + grant_ref_t gref_head; + + if (unlikely(info->connected != BLKIF_STATE_CONNECTED)) + return 1; + + if (gnttab_alloc_grant_references( + BLKIF_MAX_SEGMENTS_PER_REQUEST, &gref_head) < 0) { + gnttab_request_free_callback( + &info->callback, + blkif_restart_queue_callback, + info, + BLKIF_MAX_SEGMENTS_PER_REQUEST); + return 1; + } + + /* Fill out a communications ring structure. */ + ring_req = RING_GET_REQUEST(&info->ring, info->ring.req_prod_pvt); + id = GET_ID_FROM_FREELIST(info); + info->shadow[id].request = (unsigned long)req; + + ring_req->id = id; + ring_req->operation = rq_data_dir(req) ? + BLKIF_OP_WRITE : BLKIF_OP_READ; + ring_req->sector_number = (blkif_sector_t)req->sector; + ring_req->handle = info->handle; + + ring_req->nr_segments = 0; + rq_for_each_bio (bio, req) { + bio_for_each_segment (bvec, bio, idx) { + BUG_ON(ring_req->nr_segments + == BLKIF_MAX_SEGMENTS_PER_REQUEST); + buffer_ma = page_to_phys(bvec->bv_page); + fsect = bvec->bv_offset >> 9; + lsect = fsect + (bvec->bv_len >> 9) - 1; + /* install a grant reference. */ + ref = gnttab_claim_grant_reference(&gref_head); + ASSERT(ref != -ENOSPC); + + gnttab_grant_foreign_access_ref( + ref, + info->backend_id, + buffer_ma >> PAGE_SHIFT, + rq_data_dir(req) ); + + info->shadow[id].frame[ring_req->nr_segments] = + buffer_ma >> PAGE_SHIFT; + + ring_req->frame_and_sects[ring_req->nr_segments] = + blkif_fas_from_gref(ref, fsect, lsect); + + ring_req->nr_segments++; + } + } + + info->ring.req_prod_pvt++; + + /* Keep a private copy so we can reissue requests when recovering. */ + pickle_request(&info->shadow[id], ring_req); + + gnttab_free_grant_references(gref_head); + + return 0; } /* @@ -306,756 +259,197 @@ */ void do_blkif_request(request_queue_t *rq) { - struct blkfront_info *info = NULL; - struct request *req; - int queued; - - DPRINTK("Entered do_blkif_request\n"); - - queued = 0; - - while ( (req = elv_next_request(rq)) != NULL ) - { - info = req->rq_disk->private_data; - - if ( !blk_fs_request(req) ) - { - end_request(req, 0); - continue; - } - - if (RING_FULL(&info->ring)) - goto wait; - - DPRINTK("do_blk_req %p: cmd %p, sec %lx, (%u/%li) buffer:%p [%s]\n", - req, req->cmd, req->sector, req->current_nr_sectors, - req->nr_sectors, req->buffer, - rq_data_dir(req) ? "write" : "read"); - - blkdev_dequeue_request(req); - if (blkif_queue_request(req)) { - blk_requeue_request(rq, req); - wait: - /* Avoid pointless unplugs. */ - blk_stop_queue(rq); - break; - } - - queued++; - } - - if ( queued != 0 ) - flush_requests(info); + struct blkfront_info *info = NULL; + struct request *req; + int queued; + + DPRINTK("Entered do_blkif_request\n"); + + queued = 0; + + while ((req = elv_next_request(rq)) != NULL) { + info = req->rq_disk->private_data; + + if (!blk_fs_request(req)) { + end_request(req, 0); + continue; + } + + if (RING_FULL(&info->ring)) + goto wait; + + DPRINTK("do_blk_req %p: cmd %p, sec %lx, " + "(%u/%li) buffer:%p [%s]\n", + req, req->cmd, req->sector, req->current_nr_sectors, + req->nr_sectors, req->buffer, + rq_data_dir(req) ? "write" : "read"); + + blkdev_dequeue_request(req); + if (blkif_queue_request(req)) { + blk_requeue_request(rq, req); + wait: + /* Avoid pointless unplugs. */ + blk_stop_queue(rq); + break; + } + + queued++; + } + + if (queued != 0) + flush_requests(info); } static irqreturn_t blkif_int(int irq, void *dev_id, struct pt_regs *ptregs) { - struct request *req; - blkif_response_t *bret; - RING_IDX i, rp; - unsigned long flags; - struct blkfront_info *info = (struct blkfront_info *)dev_id; - - spin_lock_irqsave(&blkif_io_lock, flags); - - if (unlikely(info->connected != BLKIF_STATE_CONNECTED || recovery)) { - spin_unlock_irqrestore(&blkif_io_lock, flags); - return IRQ_HANDLED; - } - - rp = info->ring.sring->rsp_prod; - rmb(); /* Ensure we see queued responses up to 'rp'. */ - - for ( i = info->ring.rsp_cons; i != rp; i++ ) - { - unsigned long id; - - bret = RING_GET_RESPONSE(&info->ring, i); - id = bret->id; - req = (struct request *)blk_shadow[id].request; - - blkif_completion(&blk_shadow[id]); - - ADD_ID_TO_FREELIST(id); - - switch ( bret->operation ) - { - case BLKIF_OP_READ: - case BLKIF_OP_WRITE: - if ( unlikely(bret->status != BLKIF_RSP_OKAY) ) - DPRINTK("Bad return from blkdev data request: %x\n", - bret->status); - - if ( unlikely(end_that_request_first - (req, - (bret->status == BLKIF_RSP_OKAY), - req->hard_nr_sectors)) ) - BUG(); - end_that_request_last(req); - - break; - default: - BUG(); - } - } - - info->ring.rsp_cons = i; - - kick_pending_request_queues(info); - - spin_unlock_irqrestore(&blkif_io_lock, flags); - - return IRQ_HANDLED; -} - -#else -/************************** KERNEL VERSION 2.4 **************************/ - -static kdev_t sg_dev; -static unsigned long sg_next_sect; - -/* - * Request queues with outstanding work, but ring is currently full. - * We need no special lock here, as we always access this with the - * blkif_io_lock held. We only need a small maximum list. - */ -#define MAX_PENDING 8 -static request_queue_t *pending_queues[MAX_PENDING]; -static int nr_pending; - - -#define blkif_io_lock io_request_lock - -/*============================================================================*/ -static void kick_pending_request_queues(void) -{ - /* We kick pending request queues if the ring is reasonably empty. */ - if ( (nr_pending != 0) && - (RING_PENDING_REQUESTS(&info->ring) < (BLK_RING_SIZE >> 1)) ) - { - /* Attempt to drain the queue, but bail if the ring becomes full. */ - while ( (nr_pending != 0) && !RING_FULL(&info->ring) ) - do_blkif_request(pending_queues[--nr_pending]); - } -} - -int blkif_open(struct inode *inode, struct file *filep) -{ - short xldev = inode->i_rdev; - struct gendisk *gd = get_gendisk(xldev); - xl_disk_t *disk = xldev_to_xldisk(inode->i_rdev); - short minor = MINOR(xldev); - - if ( gd->part[minor].nr_sects == 0 ) - { - /* - * Device either doesn't exist, or has zero capacity; we use a few - * cheesy heuristics to return the relevant error code - */ - if ( (gd->sizes[minor >> gd->minor_shift] != 0) || - ((minor & (gd->max_p - 1)) != 0) ) - { - /* - * We have a real device, but no such partition, or we just have a - * partition number so guess this is the problem. - */ - return -ENXIO; /* no such device or address */ - } - else if ( gd->flags[minor >> gd->minor_shift] & GENHD_FL_REMOVABLE ) - { - /* This is a removable device => assume that media is missing. */ - return -ENOMEDIUM; /* media not present (this is a guess) */ - } - else - { - /* Just go for the general 'no such device' error. */ - return -ENODEV; /* no such device */ - } - } - - /* Update of usage count is protected by per-device semaphore. */ - disk->usage++; - - return 0; -} - - -int blkif_release(struct inode *inode, struct file *filep) -{ - xl_disk_t *disk = xldev_to_xldisk(inode->i_rdev); - - /* - * When usage drops to zero it may allow more VBD updates to occur. - * Update of usage count is protected by a per-device semaphore. - */ - if ( --disk->usage == 0 ) { - vbd_update(); - } - - return 0; -} - - -int blkif_ioctl(struct inode *inode, struct file *filep, - unsigned command, unsigned long argument) -{ - kdev_t dev = inode->i_rdev; - struct hd_geometry *geo = (struct hd_geometry *)argument; - struct gendisk *gd; - struct hd_struct *part; - int i; - unsigned short cylinders; - byte heads, sectors; - - /* NB. No need to check permissions. That is done for us. */ - - DPRINTK_IOCTL("command: 0x%x, argument: 0x%lx, dev: 0x%04x\n", - command, (long) argument, dev); - - gd = get_gendisk(dev); - part = &gd->part[MINOR(dev)]; - - switch ( command ) - { - case BLKGETSIZE: - DPRINTK_IOCTL(" BLKGETSIZE: %x %lx\n", BLKGETSIZE, part->nr_sects); - return put_user(part->nr_sects, (unsigned long *) argument); - - case BLKGETSIZE64: - DPRINTK_IOCTL(" BLKGETSIZE64: %x %llx\n", BLKGETSIZE64, - (u64)part->nr_sects * 512); - return put_user((u64)part->nr_sects * 512, (u64 *) argument); - - case BLKRRPART: /* re-read partition table */ - DPRINTK_IOCTL(" BLKRRPART: %x\n", BLKRRPART); - return blkif_revalidate(dev); - - case BLKSSZGET: - return hardsect_size[MAJOR(dev)][MINOR(dev)]; - - case BLKBSZGET: /* get block size */ - DPRINTK_IOCTL(" BLKBSZGET: %x\n", BLKBSZGET); - break; - - case BLKBSZSET: /* set block size */ - DPRINTK_IOCTL(" BLKBSZSET: %x\n", BLKBSZSET); - break; - - case BLKRASET: /* set read-ahead */ - DPRINTK_IOCTL(" BLKRASET: %x\n", BLKRASET); - break; - - case BLKRAGET: /* get read-ahead */ - DPRINTK_IOCTL(" BLKRAFET: %x\n", BLKRAGET); - break; - - case HDIO_GETGEO: - DPRINTK_IOCTL(" HDIO_GETGEO: %x\n", HDIO_GETGEO); - if (!argument) return -EINVAL; - - /* We don't have real geometry info, but let's at least return - values consistent with the size of the device */ - - heads = 0xff; - sectors = 0x3f; - cylinders = part->nr_sects / (heads * sectors); - - if (put_user(0x00, (unsigned long *) &geo->start)) return -EFAULT; - if (put_user(heads, (byte *)&geo->heads)) return -EFAULT; - if (put_user(sectors, (byte *)&geo->sectors)) return -EFAULT; - if (put_user(cylinders, (unsigned short *)&geo->cylinders)) return -EFAULT; - - return 0; - - case HDIO_GETGEO_BIG: - DPRINTK_IOCTL(" HDIO_GETGEO_BIG: %x\n", HDIO_GETGEO_BIG); - if (!argument) return -EINVAL; - - /* We don't have real geometry info, but let's at least return - values consistent with the size of the device */ - - heads = 0xff; - sectors = 0x3f; - cylinders = part->nr_sects / (heads * sectors); - - if (put_user(0x00, (unsigned long *) &geo->start)) return -EFAULT; - if (put_user(heads, (byte *)&geo->heads)) return -EFAULT; - if (put_user(sectors, (byte *)&geo->sectors)) return -EFAULT; - if (put_user(cylinders, (unsigned int *) &geo->cylinders)) return -EFAULT; - - return 0; - - case CDROMMULTISESSION: - DPRINTK("FIXME: support multisession CDs later\n"); - for ( i = 0; i < sizeof(struct cdrom_multisession); i++ ) - if ( put_user(0, (byte *)(argument + i)) ) return -EFAULT; - return 0; - - case SCSI_IOCTL_GET_BUS_NUMBER: - DPRINTK("FIXME: SCSI_IOCTL_GET_BUS_NUMBER ioctl in XL blkif"); - return -ENOSYS; - - default: - WPRINTK("ioctl %08x not supported by XL blkif\n", command); - return -ENOSYS; - } - - return 0; -} - - - -/* check media change: should probably do something here in some cases :-) */ -int blkif_check(kdev_t dev) -{ - DPRINTK("blkif_check\n"); - return 0; -} - -int blkif_revalidate(kdev_t dev) -{ - struct block_device *bd; - struct gendisk *gd; - xl_disk_t *disk; - unsigned long capacity; - int i, rc = 0; - - if ( (bd = bdget(dev)) == NULL ) - return -EINVAL; - - /* - * Update of partition info, and check of usage count, is protected - * by the per-block-device semaphore. - */ - down(&bd->bd_sem); - - if ( ((gd = get_gendisk(dev)) == NULL) || - ((disk = xldev_to_xldisk(dev)) == NULL) || - ((capacity = gd->part[MINOR(dev)].nr_sects) == 0) ) - { - rc = -EINVAL; - goto out; - } - - if ( disk->usage > 1 ) - { - rc = -EBUSY; - goto out; - } - - /* Only reread partition table if VBDs aren't mapped to partitions. */ - if ( !(gd->flags[MINOR(dev) >> gd->minor_shift] & GENHD_FL_VIRT_PARTNS) ) - { - for ( i = gd->max_p - 1; i >= 0; i-- ) - { - invalidate_device(dev+i, 1); - gd->part[MINOR(dev+i)].start_sect = 0; - gd->part[MINOR(dev+i)].nr_sects = 0; - gd->sizes[MINOR(dev+i)] = 0; - } - - grok_partitions(gd, MINOR(dev)>>gd->minor_shift, gd->max_p, capacity); - } - - out: - up(&bd->bd_sem); - bdput(bd); - return rc; -} - - -/* - * blkif_queue_request - * - * request block io - * - * id: for guest use only. - * operation: BLKIF_OP_{READ,WRITE,PROBE} - * buffer: buffer to read/write into. this should be a - * virtual address in the guest os. - */ -static int blkif_queue_request(unsigned long id, - int operation, - char * buffer, - unsigned long sector_number, - unsigned short nr_sectors, - kdev_t device, - blkif_vdev_t handle) -{ - unsigned long buffer_ma = virt_to_bus(buffer); - unsigned long xid; - struct gendisk *gd; - blkif_request_t *req; - struct buffer_head *bh; - unsigned int fsect, lsect; - int ref; - - fsect = (buffer_ma & ~PAGE_MASK) >> 9; - lsect = fsect + nr_sectors - 1; - - /* Buffer must be sector-aligned. Extent mustn't cross a page boundary. */ - if ( unlikely((buffer_ma & ((1<<9)-1)) != 0) ) - BUG(); - if ( lsect > ((PAGE_SIZE/512)-1) ) - BUG(); - - buffer_ma &= PAGE_MASK; - - if (unlikely(info->connected != BLKIF_STATE_CONNECTED)) - return 1; - - switch ( operation ) - { - - case BLKIF_OP_READ: - case BLKIF_OP_WRITE: - gd = get_gendisk(device); - - /* - * Update the sector_number we'll pass down as appropriate; note that - * we could sanity check that resulting sector will be in this - * partition, but this will happen in driver backend anyhow. - */ - sector_number += gd->part[MINOR(device)].start_sect; - - /* - * If this unit doesn't consist of virtual partitions then we clear - * the partn bits from the device number. - */ - if ( !(gd->flags[MINOR(device)>>gd->minor_shift] & - GENHD_FL_VIRT_PARTNS) ) - device &= ~(gd->max_p - 1); - - if ( (sg_operation == operation) && - (sg_dev == device) && - (sg_next_sect == sector_number) ) - { - req = RING_GET_REQUEST(&info->ring, - info->ring.req_prod_pvt - 1); - bh = (struct buffer_head *)id; - - bh->b_reqnext = (struct buffer_head *)blk_shadow[req->id].request; - blk_shadow[req->id].request = (unsigned long)id; - - /* install a grant reference. */ - ref = gnttab_claim_grant_reference(&gref_head); - ASSERT( ref != -ENOSPC ); - - gnttab_grant_foreign_access_ref( - ref, - info->backend_id, - buffer_ma >> PAGE_SHIFT, - ( operation == BLKIF_OP_WRITE ? 1 : 0 ) ); - - blk_shadow[req->id].frame[req->nr_segments] = - buffer_ma >> PAGE_SHIFT; - - req->frame_and_sects[req->nr_segments] = - blkif_fas_from_gref(ref, fsect, lsect); - if ( ++req->nr_segments < BLKIF_MAX_SEGMENTS_PER_REQUEST ) - sg_next_sect += nr_sectors; - else - DISABLE_SCATTERGATHER(); - - /* Update the copy of the request in the recovery ring. */ - pickle_request(&blk_shadow[req->id], req ); - - return 0; - } - else if ( RING_FULL(&info->ring) ) - { - return 1; - } - else - { - sg_operation = operation; - sg_dev = device; - sg_next_sect = sector_number + nr_sectors; - } - break; - - default: - panic("unknown op %d\n", operation); - } - - /* Fill out a communications ring structure. */ - req = RING_GET_REQUEST(&info->ring, info->ring.req_prod_pvt); - - xid = GET_ID_FROM_FREELIST(); - blk_shadow[xid].request = (unsigned long)id; - - req->id = xid; - req->operation = operation; - req->sector_number = (blkif_sector_t)sector_number; - req->handle = handle; - req->nr_segments = 1; - /* install a grant reference. */ - ref = gnttab_claim_grant_reference(&gref_head); - ASSERT( ref != -ENOSPC ); - - gnttab_grant_foreign_access_ref( - ref, - info->backend_id, - buffer_ma >> PAGE_SHIFT, - ( operation == BLKIF_OP_WRITE ? 1 : 0 ) ); - - blk_shadow[xid].frame[0] = buffer_ma >> PAGE_SHIFT; - - req->frame_and_sects[0] = blkif_fas_from_gref(ref, fsect, lsect); - - /* Keep a private copy so we can reissue requests when recovering. */ - pickle_request(&blk_shadow[xid], req); - - info->ring.req_prod_pvt++; - - return 0; -} - - -/* - * do_blkif_request - * read a block; request is in a request queue - */ -void do_blkif_request(request_queue_t *rq) -{ - struct request *req; - struct buffer_head *bh, *next_bh; - int rw, nsect, full, queued = 0; - - DPRINTK("Entered do_blkif_request\n"); - - while ( !rq->plugged && !list_empty(&rq->queue_head)) - { - if ( (req = blkdev_entry_next_request(&rq->queue_head)) == NULL ) - goto out; - - DPRINTK("do_blkif_request %p: cmd %i, sec %lx, (%li/%li) bh:%p\n", - req, req->cmd, req->sector, - req->current_nr_sectors, req->nr_sectors, req->bh); - - rw = req->cmd; - if ( rw == READA ) - rw = READ; - if ( unlikely((rw != READ) && (rw != WRITE)) ) - panic("XenoLinux Virtual Block Device: bad cmd: %d\n", rw); - - req->errors = 0; - - bh = req->bh; - while ( bh != NULL ) - { - next_bh = bh->b_reqnext; - bh->b_reqnext = NULL; - - full = blkif_queue_request( - (unsigned long)bh, - (rw == READ) ? BLKIF_OP_READ : BLKIF_OP_WRITE, - bh->b_data, bh->b_rsector, bh->b_size>>9, bh->b_rdev); - - if ( full ) - { - bh->b_reqnext = next_bh; - pending_queues[nr_pending++] = rq; - if ( unlikely(nr_pending >= MAX_PENDING) ) - BUG(); - goto out; - } - - queued++; - - /* Dequeue the buffer head from the request. */ - nsect = bh->b_size >> 9; - bh = req->bh = next_bh; - - if ( bh != NULL ) - { - /* There's another buffer head to do. Update the request. */ - req->hard_sector += nsect; - req->hard_nr_sectors -= nsect; - req->sector = req->hard_sector; - req->nr_sectors = req->hard_nr_sectors; - req->current_nr_sectors = bh->b_size >> 9; - req->buffer = bh->b_data; - } - else - { - /* That was the last buffer head. Finalise the request. */ - if ( unlikely(end_that_request_first(req, 1, "XenBlk")) ) - BUG(); - blkdev_dequeue_request(req); - end_that_request_last(req); - } - } - } - - out: - if ( queued != 0 ) - flush_requests(); -} - - -static void blkif_int(int irq, void *dev_id, struct pt_regs *ptregs) -{ - RING_IDX i, rp; - unsigned long flags; - struct buffer_head *bh, *next_bh; - - spin_lock_irqsave(&io_request_lock, flags); - - if ( unlikely(info->connected != BLKIF_STATE_CONNECTED || recovery) ) - { - spin_unlock_irqrestore(&io_request_lock, flags); - return; - } - - rp = info->ring.sring->rsp_prod; - rmb(); /* Ensure we see queued responses up to 'rp'. */ - - for ( i = info->ring.rsp_cons; i != rp; i++ ) - { - unsigned long id; - blkif_response_t *bret; - - bret = RING_GET_RESPONSE(&info->ring, i); - id = bret->id; - bh = (struct buffer_head *)blk_shadow[id].request; - - blkif_completion(&blk_shadow[id]); - - ADD_ID_TO_FREELIST(id); - - switch ( bret->operation ) - { - case BLKIF_OP_READ: - case BLKIF_OP_WRITE: - if ( unlikely(bret->status != BLKIF_RSP_OKAY) ) - DPRINTK("Bad return from blkdev data request: %lx\n", - bret->status); - for ( ; bh != NULL; bh = next_bh ) - { - next_bh = bh->b_reqnext; - bh->b_reqnext = NULL; - bh->b_end_io(bh, bret->status == BLKIF_RSP_OKAY); - } - - break; - case BLKIF_OP_PROBE: - memcpy(&blkif_control_rsp, bret, sizeof(*bret)); - blkif_control_rsp_valid = 1; - break; - default: - BUG(); - } - - } - info->ring.rsp_cons = i; - - kick_pending_request_queues(); - - spin_unlock_irqrestore(&io_request_lock, flags); -} - -#endif - -/***************************** COMMON CODE *******************************/ + struct request *req; + blkif_response_t *bret; + RING_IDX i, rp; + unsigned long flags; + struct blkfront_info *info = (struct blkfront_info *)dev_id; + + spin_lock_irqsave(&blkif_io_lock, flags); + + if (unlikely(info->connected != BLKIF_STATE_CONNECTED || recovery)) { + spin_unlock_irqrestore(&blkif_io_lock, flags); + return IRQ_HANDLED; + } + + rp = info->ring.sring->rsp_prod; + rmb(); /* Ensure we see queued responses up to 'rp'. */ + + for (i = info->ring.rsp_cons; i != rp; i++) { + unsigned long id; + + bret = RING_GET_RESPONSE(&info->ring, i); + id = bret->id; + req = (struct request *)info->shadow[id].request; + + blkif_completion(&info->shadow[id]); + + ADD_ID_TO_FREELIST(info, id); + + switch (bret->operation) { + case BLKIF_OP_READ: + case BLKIF_OP_WRITE: + if (unlikely(bret->status != BLKIF_RSP_OKAY)) + DPRINTK("Bad return from blkdev data " + "request: %x\n", bret->status); + + BUG_ON(end_that_request_first( + req, (bret->status == BLKIF_RSP_OKAY), + req->hard_nr_sectors)); + end_that_request_last(req); + break; + default: + BUG(); + } + } + + info->ring.rsp_cons = i; + + kick_pending_request_queues(info); + + spin_unlock_irqrestore(&blkif_io_lock, flags); + + return IRQ_HANDLED; +} static void blkif_free(struct blkfront_info *info) { - /* Prevent new requests being issued until we fix things up. */ - spin_lock_irq(&blkif_io_lock); - info->connected = BLKIF_STATE_DISCONNECTED; - spin_unlock_irq(&blkif_io_lock); - - /* Free resources associated with old device channel. */ - if ( info->ring.sring != NULL ) - { - free_page((unsigned long)info->ring.sring); - info->ring.sring = NULL; - } - unbind_evtchn_from_irqhandler(info->evtchn, NULL); - info->evtchn = 0; + /* Prevent new requests being issued until we fix things up. */ + spin_lock_irq(&blkif_io_lock); + info->connected = BLKIF_STATE_DISCONNECTED; + spin_unlock_irq(&blkif_io_lock); + + /* Free resources associated with old device channel. */ + if (info->ring.sring != NULL) { + free_page((unsigned long)info->ring.sring); + info->ring.sring = NULL; + } + unbind_evtchn_from_irqhandler(info->evtchn, NULL); + info->evtchn = 0; } static void blkif_recover(struct blkfront_info *info) { - int i; - blkif_request_t *req; - struct blk_shadow *copy; - int j; - - /* Stage 1: Make a safe copy of the shadow state. */ - copy = (struct blk_shadow *)kmalloc(sizeof(blk_shadow), GFP_KERNEL); - BUG_ON(copy == NULL); - memcpy(copy, blk_shadow, sizeof(blk_shadow)); - - /* Stage 2: Set up free list. */ - memset(&blk_shadow, 0, sizeof(blk_shadow)); - for ( i = 0; i < BLK_RING_SIZE; i++ ) - blk_shadow[i].req.id = i+1; - blk_shadow_free = info->ring.req_prod_pvt; - blk_shadow[BLK_RING_SIZE-1].req.id = 0x0fffffff; - - /* Stage 3: Find pending requests and requeue them. */ - for ( i = 0; i < BLK_RING_SIZE; i++ ) - { - /* Not in use? */ - if ( copy[i].request == 0 ) - continue; - - /* Grab a request slot and unpickle shadow state into it. */ - req = RING_GET_REQUEST( - &info->ring, info->ring.req_prod_pvt); - unpickle_request(req, &copy[i]); - - /* We get a new request id, and must reset the shadow state. */ - req->id = GET_ID_FROM_FREELIST(); - memcpy(&blk_shadow[req->id], &copy[i], sizeof(copy[i])); - - /* Rewrite any grant references invalidated by suspend/resume. */ - for ( j = 0; j < req->nr_segments; j++ ) - { - if ( req->frame_and_sects[j] & GRANTREF_INVALID ) - gnttab_grant_foreign_access_ref( - blkif_gref_from_fas(req->frame_and_sects[j]), - info->backend_id, - blk_shadow[req->id].frame[j], - rq_data_dir((struct request *) - blk_shadow[req->id].request)); - req->frame_and_sects[j] &= ~GRANTREF_INVALID; - } - blk_shadow[req->id].req = *req; - - info->ring.req_prod_pvt++; - } - - kfree(copy); - - recovery = 0; - - /* info->ring->req_prod will be set when we flush_requests().*/ - wmb(); - - /* Kicks things back into life. */ - flush_requests(info); - - /* Now safe to left other people use the interface. */ - info->connected = BLKIF_STATE_CONNECTED; + int i; + blkif_request_t *req; + struct blk_shadow *copy; + int j; + + /* Stage 1: Make a safe copy of the shadow state. */ + copy = (struct blk_shadow *)kmalloc(sizeof(info->shadow), GFP_KERNEL); + BUG_ON(copy == NULL); + memcpy(copy, info->shadow, sizeof(info->shadow)); + + /* Stage 2: Set up free list. */ + memset(&info->shadow, 0, sizeof(info->shadow)); + for (i = 0; i < BLK_RING_SIZE; i++) + info->shadow[i].req.id = i+1; + info->shadow_free = info->ring.req_prod_pvt; + info->shadow[BLK_RING_SIZE-1].req.id = 0x0fffffff; + + /* Stage 3: Find pending requests and requeue them. */ + for (i = 0; i < BLK_RING_SIZE; i++) { + /* Not in use? */ + if (copy[i].request == 0) + continue; + + /* Grab a request slot and unpickle shadow state into it. */ + req = RING_GET_REQUEST( + &info->ring, info->ring.req_prod_pvt); + unpickle_request(req, &copy[i]); + + /* We get a new request id, and must reset the shadow state. */ + req->id = GET_ID_FROM_FREELIST(info); + memcpy(&info->shadow[req->id], &copy[i], sizeof(copy[i])); + + /* Rewrite any grant references invalidated by susp/resume. */ + for (j = 0; j < req->nr_segments; j++) { + if ( req->frame_and_sects[j] & GRANTREF_INVALID ) + gnttab_grant_foreign_access_ref( + blkif_gref_from_fas( + req->frame_and_sects[j]), + info->backend_id, + info->shadow[req->id].frame[j], + rq_data_dir( + (struct request *) + info->shadow[req->id].request)); + req->frame_and_sects[j] &= ~GRANTREF_INVALID; + } + info->shadow[req->id].req = *req; + + info->ring.req_prod_pvt++; + } + + kfree(copy); + + recovery = 0; + + /* info->ring->req_prod will be set when we flush_requests().*/ + wmb(); + + /* Kicks things back into life. */ + flush_requests(info); + + /* Now safe to left other people use the interface. */ + info->connected = BLKIF_STATE_CONNECTED; } static void blkif_connect(struct blkfront_info *info, u16 evtchn) { - int err = 0; - - info->evtchn = evtchn; - - err = bind_evtchn_to_irqhandler( - info->evtchn, blkif_int, SA_SAMPLE_RANDOM, "blkif", info); - if ( err != 0 ) - { - WPRINTK("bind_evtchn_to_irqhandler failed (err=%d)\n", err); - return; - } + int err = 0; + + info->evtchn = evtchn; + + err = bind_evtchn_to_irqhandler( + info->evtchn, blkif_int, SA_SAMPLE_RANDOM, "blkif", info); + if (err != 0) { + WPRINTK("bind_evtchn_to_irqhandler failed (err=%d)\n", err); + return; + } } @@ -1227,9 +621,8 @@ static int blkfront_probe(struct xenbus_device *dev, const struct xenbus_device_id *id) { - int err; + int err, vdevice, i; struct blkfront_info *info; - int vdevice; /* FIXME: Use dynamic device id if this is not set. */ err = xenbus_scanf(dev->nodename, "virtual-device", "%i", &vdevice); @@ -1250,6 +643,12 @@ info->connected = BLKIF_STATE_DISCONNECTED; info->mi = NULL; INIT_WORK(&info->work, blkif_restart_queue, (void *)info); + + info->shadow_free = 0; + memset(info->shadow, 0, sizeof(info->shadow)); + for (i = 0; i < BLK_RING_SIZE; i++) + info->shadow[i].req.id = i+1; + info->shadow[BLK_RING_SIZE-1].req.id = 0x0fffffff; /* Front end dir is a number, which is used as the id. */ info->handle = simple_strtoul(strrchr(dev->nodename,'/')+1, NULL, 0); @@ -1329,55 +728,57 @@ static int wait_for_blkif(void) { - int err = 0; - int i; - - /* - * We should figure out how many and which devices we need to - * proceed and only wait for those. For now, continue once the - * first device is around. - */ - for ( i=0; blkif_state != BLKIF_STATE_CONNECTED && (i < 10*HZ); i++ ) - { - set_current_state(TASK_INTERRUPTIBLE); - schedule_timeout(1); - } - - if ( blkif_state != BLKIF_STATE_CONNECTED ) - { - WPRINTK("Timeout connecting to device!\n"); - err = -ENOSYS; - } - return err; + int err = 0; + int i; + + /* + * We should figure out how many and which devices we need to + * proceed and only wait for those. For now, continue once the + * first device is around. + */ + for (i = 0; blkif_state != BLKIF_STATE_CONNECTED && (i < 10*HZ); i++) { + set_current_state(TASK_INTERRUPTIBLE); + schedule_timeout(1); + } + + if (blkif_state != BLKIF_STATE_CONNECTED) { + WPRINTK("Timeout connecting to device!\n"); + err = -ENOSYS; + } + return err; } static int __init xlblk_init(void) { - int i; - - if ( (xen_start_info.flags & SIF_INITDOMAIN) || - (xen_start_info.flags & SIF_BLK_BE_DOMAIN) ) - return 0; - - IPRINTK("Initialising virtual block device driver\n"); - - blk_shadow_free = 0; - memset(blk_shadow, 0, sizeof(blk_shadow)); - for ( i = 0; i < BLK_RING_SIZE; i++ ) - blk_shadow[i].req.id = i+1; - blk_shadow[BLK_RING_SIZE-1].req.id = 0x0fffffff; - - init_blk_xenbus(); - - wait_for_blkif(); - - return 0; -} + if ((xen_start_info.flags & SIF_INITDOMAIN) + || (xen_start_info.flags & SIF_BLK_BE_DOMAIN) ) + return 0; + + IPRINTK("Initialising virtual block device driver\n"); + + init_blk_xenbus(); + + wait_for_blkif(); + + return 0; +} + +module_init(xlblk_init); static void blkif_completion(struct blk_shadow *s) { - int i; - for ( i = 0; i < s->req.nr_segments; i++ ) - gnttab_free_grant_reference( - blkif_gref_from_fas(s->req.frame_and_sects[i])); -} + int i; + for (i = 0; i < s->req.nr_segments; i++) + gnttab_free_grant_reference( + blkif_gref_from_fas(s->req.frame_and_sects[i])); +} + +/* + * Local variables: + * c-file-style: "linux" + * indent-tabs-mode: t + * c-indent-level: 8 + * c-basic-offset: 8 + * tab-width: 8 + * End: + */ diff -r dfaf788ab18c -r a698bd49931b linux-2.6-xen-sparse/drivers/xen/blkfront/block.h --- a/linux-2.6-xen-sparse/drivers/xen/blkfront/block.h Fri Aug 26 20:47:16 2005 +++ b/linux-2.6-xen-sparse/drivers/xen/blkfront/block.h Mon Aug 29 20:19:24 2005 @@ -96,6 +96,14 @@ struct xlbd_type_info *type; }; +struct blk_shadow { + blkif_request_t req; + unsigned long request; + unsigned long frame[BLKIF_MAX_SEGMENTS_PER_REQUEST]; +}; + +#define BLK_RING_SIZE __RING_SIZE((blkif_sring_t *)0, PAGE_SIZE) + /* * We have one of these per vbd, whether ide, scsi or 'other'. They * hang in private_data off the gendisk structure. We may end up @@ -116,11 +124,11 @@ blkif_front_ring_t ring; unsigned int evtchn; struct xlbd_major_info *mi; -#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0) request_queue_t *rq; -#endif struct work_struct work; struct gnttab_free_callback callback; + struct blk_shadow shadow[BLK_RING_SIZE]; + unsigned long shadow_free; }; extern spinlock_t blkif_io_lock; diff -r dfaf788ab18c -r a698bd49931b linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c --- a/linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c Fri Aug 26 20:47:16 2005 +++ b/linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c Mon Aug 29 20:19:24 2005 @@ -1272,25 +1272,24 @@ static int netfront_suspend(struct xenbus_device *dev) { - struct net_private *np = dev->data; - /* Avoid having tx/rx stuff happen until we're ready. */ - unbind_evtchn_from_irqhandler(np->evtchn, np->netdev); - return 0; + struct netfront_info *info = dev->data; + + unregister_xenbus_watch(&info->watch); + kfree(info->backend); + info->backend = NULL; + + netif_free(info); + + return 0; } static int netfront_resume(struct xenbus_device *dev) { - struct net_private *np = dev->data; - /* - * Connect regardless of whether IFF_UP flag set. - * Stop bad things from happening until we're back up. - */ - np->backend_state = BEST_DISCONNECTED; - memset(np->tx, 0, PAGE_SIZE); - memset(np->rx, 0, PAGE_SIZE); - - // send_interface_connect(np); - return 0; + struct net_private *np = dev->data; + int err; + + err = talk_to_backend(dev, np); + return err; } static struct xenbus_driver netfront = { diff -r dfaf788ab18c -r a698bd49931b tools/libxc/xc_linux_save.c --- a/tools/libxc/xc_linux_save.c Fri Aug 26 20:47:16 2005 +++ b/tools/libxc/xc_linux_save.c Mon Aug 29 20:19:24 2005 @@ -763,8 +763,6 @@ batch++; } -// DPRINTF("batch %d:%d (n=%d)\n", iter, batch, n); - if ( batch == 0 ) goto skip; /* vanishingly unlikely... */ @@ -915,7 +913,7 @@ continue; } - if ( last_iter ) break; + if ( last_iter ) break; if ( live ) { diff -r dfaf788ab18c -r a698bd49931b tools/python/xen/xend/XendCheckpoint.py --- a/tools/python/xen/xend/XendCheckpoint.py Fri Aug 26 20:47:16 2005 +++ b/tools/python/xen/xend/XendCheckpoint.py Mon Aug 29 20:19:24 2005 @@ -51,7 +51,7 @@ p = select.poll() p.register(child.fromchild.fileno()) p.register(child.childerr.fileno()) - while True: + while True: r = p.poll() for (fd, event) in r: if not event & select.POLLIN: @@ -69,8 +69,9 @@ try: dominfo.db.releaseDomain(dominfo.id) except Exception, ex: - log.warning("error in domain release on xenstore: %s", - ex) + log.warning( + "error in domain release on xenstore: %s", + ex) pass dominfo.state_wait("suspended") log.info("suspend %d done" % dominfo.id) diff -r dfaf788ab18c -r a698bd49931b tools/python/xen/xend/server/SrvDaemon.py --- a/tools/python/xen/xend/server/SrvDaemon.py Fri Aug 26 20:47:16 2005 +++ b/tools/python/xen/xend/server/SrvDaemon.py Mon Aug 29 20:19:24 2005 @@ -42,7 +42,8 @@ self.traceon = 0 self.tracefile = None self.traceindent = 0 - + self.child = 0 + def daemon_pids(self): pids = [] pidex = '(?P<pid>\d+)' @@ -140,15 +141,12 @@ else: return 0 - def install_child_reaper(self): - #signal.signal(signal.SIGCHLD, self.onSIGCHLD) - # Ensure that zombie children are automatically reaped. - xu.autoreap() - def onSIGCHLD(self, signum, frame): - code = 1 - while code > 0: - code = os.waitpid(-1, os.WNOHANG) + if self.child > 0: + try: + pid, sts = os.waitpid(self.child, os.WNOHANG) + except os.error, ex: + pass def fork_pid(self, pidfile): """Fork and write the pid of the child to 'pidfile'. @@ -156,13 +154,16 @@ @param pidfile: pid file @return: pid of child in parent, 0 in child """ - pid = os.fork() - if pid: + + self.child = os.fork() + + if self.child: # Parent pidfile = open(pidfile, 'w') - pidfile.write(str(pid)) + pidfile.write(str(self.child)) pidfile.close() - return pid + + return self.child def daemonize(self): if not XEND_DAEMONIZE: return @@ -203,8 +204,7 @@ # Trying to run an already-running service is a success. return 0 - self.install_child_reaper() - + signal.signal(signal.SIGCHLD, self.onSIGCHLD) if self.fork_pid(XEND_PID_FILE): #Parent. Sleep to give child time to start. time.sleep(1) @@ -309,7 +309,7 @@ print >>sys.stderr, 'Exception starting xend:', ex if XEND_DEBUG: traceback.print_exc() - log.exception("Exception starting xend") + log.exception("Exception starting xend (%s)" % ex) self.exit(1) def createFactories(self): diff -r dfaf788ab18c -r a698bd49931b tools/python/xen/xm/main.py --- a/tools/python/xen/xm/main.py Fri Aug 26 20:47:16 2005 +++ b/tools/python/xen/xm/main.py Mon Aug 29 20:19:24 2005 @@ -715,9 +715,9 @@ err("Most commands need root access. Please try again as root") sys.exit(1) except XendError, ex: - if args[0] == "bogus": - args.remove("bogus") if len(args) > 0: + if args[0] == "bogus": + args.remove("bogus") handle_xend_error(argv[1], args[0], ex) else: print "Unexpected error:", sys.exc_info()[0] _______________________________________________ Xen-changelog mailing list Xen-changelog@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-changelog

©2013 Xen Project, A Linux Foundation Collaborative Project. All Rights Reserved.
Linux Foundation is a registered trademark of The Linux Foundation.
Xen Project is a trademark of The Linux Foundation.