[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-changelog] [xen-unstable] arm: fixup hard tabs
# HG changeset patch # User Ian Campbell <ian.campbell@xxxxxxxxxx> # Date 1329153968 0 # Node ID 0ba87b95e80bae059fe70b4b117dcc409f2471ef # Parent f3d137e3e6c79ea9c66e7b224c9041404e753149 arm: fixup hard tabs Unfortunately the tool I was using to apply patches mangles hard tabs. This patch corrects this in the effected files (which is fortunately only a subset of .S or files imported from Linux). This commit fixes this error such that the tree represents the state it would have been in had I correctly committed what I was sent. "git diff" and "git diff -b" vs. Stefano's v6 branch now contain the same output -- i.e. only the intervening development Signed-off-by: Ian Campbell <ian.campbell@xxxxxxxxxx> --- diff -r f3d137e3e6c7 -r 0ba87b95e80b xen/arch/arm/dummy.S --- a/xen/arch/arm/dummy.S Mon Feb 13 18:17:28 2012 +0000 +++ b/xen/arch/arm/dummy.S Mon Feb 13 17:26:08 2012 +0000 @@ -1,13 +1,13 @@ /* Nothing is mapped at 1G, for the moment */ #define DUMMY(x) \ - .globl x; \ -x: .word 0xe7f000f0 -/* x: mov r0, #0x40000000 ; str r0, [r0]; b x */ + .globl x; \ +x: .word 0xe7f000f0 +/* x: mov r0, #0x40000000 ; str r0, [r0]; b x */ #define NOP(x) \ - .globl x; \ -x: mov pc, lr - + .globl x; \ +x: mov pc, lr + DUMMY(alloc_pirq_struct); DUMMY(alloc_vcpu_guest_context); DUMMY(arch_do_domctl); diff -r f3d137e3e6c7 -r 0ba87b95e80b xen/arch/arm/entry.S --- a/xen/arch/arm/entry.S Mon Feb 13 18:17:28 2012 +0000 +++ b/xen/arch/arm/entry.S Mon Feb 13 17:26:08 2012 +0000 @@ -1,69 +1,69 @@ #include <xen/config.h> #include <asm/asm_defns.h> -#define SAVE_ONE_BANKED(reg) mrs r11, reg; str r11, [sp, #UREGS_##reg] -#define RESTORE_ONE_BANKED(reg) ldr r11, [sp, #UREGS_##reg]; msr reg, r11 +#define SAVE_ONE_BANKED(reg) mrs r11, reg; str r11, [sp, #UREGS_##reg] +#define RESTORE_ONE_BANKED(reg) ldr r11, [sp, #UREGS_##reg]; msr reg, r11 #define SAVE_BANKED(mode) \ - SAVE_ONE_BANKED(SP_##mode) ; SAVE_ONE_BANKED(LR_##mode) ; SAVE_ONE_BANKED(SPSR_##mode) + SAVE_ONE_BANKED(SP_##mode) ; SAVE_ONE_BANKED(LR_##mode) ; SAVE_ONE_BANKED(SPSR_##mode) #define RESTORE_BANKED(mode) \ - RESTORE_ONE_BANKED(SP_##mode) ; RESTORE_ONE_BANKED(LR_##mode) ; RESTORE_ONE_BANKED(SPSR_##mode) + RESTORE_ONE_BANKED(SP_##mode) ; RESTORE_ONE_BANKED(LR_##mode) ; RESTORE_ONE_BANKED(SPSR_##mode) -#define SAVE_ALL \ - sub sp, #(UREGS_R8_fiq - UREGS_sp); /* SP, LR, SPSR, PC */ \ - push {r0-r12}; /* Save R0-R12 */ \ - \ - mrs r11, ELR_hyp; /* ELR_hyp is return address. */ \ - str r11, [sp, #UREGS_pc]; \ - \ - str lr, [sp, #UREGS_lr]; \ - \ - add r11, sp, #UREGS_kernel_sizeof+4; \ - str r11, [sp, #UREGS_sp]; \ - \ - mrs r11, SPSR_hyp; \ - str r11, [sp, #UREGS_cpsr]; \ - and r11, #PSR_MODE_MASK; \ - cmp r11, #PSR_MODE_HYP; \ - blne save_guest_regs +#define SAVE_ALL \ + sub sp, #(UREGS_R8_fiq - UREGS_sp); /* SP, LR, SPSR, PC */ \ + push {r0-r12}; /* Save R0-R12 */ \ + \ + mrs r11, ELR_hyp; /* ELR_hyp is return address. */ \ + str r11, [sp, #UREGS_pc]; \ + \ + str lr, [sp, #UREGS_lr]; \ + \ + add r11, sp, #UREGS_kernel_sizeof+4; \ + str r11, [sp, #UREGS_sp]; \ + \ + mrs r11, SPSR_hyp; \ + str r11, [sp, #UREGS_cpsr]; \ + and r11, #PSR_MODE_MASK; \ + cmp r11, #PSR_MODE_HYP; \ + blne save_guest_regs save_guest_regs: - ldr r11, [sp, #UREGS_lr] - str r11, [sp, #UREGS_LR_usr] - ldr r11, =0xffffffff /* Clobber SP which is only valid for hypervisor frames. */ - str r11, [sp, #UREGS_sp] - SAVE_ONE_BANKED(SP_usr) - SAVE_BANKED(svc) - SAVE_BANKED(abt) - SAVE_BANKED(und) - SAVE_BANKED(irq) - SAVE_BANKED(fiq) - SAVE_ONE_BANKED(R8_fiq); SAVE_ONE_BANKED(R9_fiq); SAVE_ONE_BANKED(R10_fiq) - SAVE_ONE_BANKED(R11_fiq); SAVE_ONE_BANKED(R12_fiq); - mov pc, lr + ldr r11, [sp, #UREGS_lr] + str r11, [sp, #UREGS_LR_usr] + ldr r11, =0xffffffff /* Clobber SP which is only valid for hypervisor frames. */ + str r11, [sp, #UREGS_sp] + SAVE_ONE_BANKED(SP_usr) + SAVE_BANKED(svc) + SAVE_BANKED(abt) + SAVE_BANKED(und) + SAVE_BANKED(irq) + SAVE_BANKED(fiq) + SAVE_ONE_BANKED(R8_fiq); SAVE_ONE_BANKED(R9_fiq); SAVE_ONE_BANKED(R10_fiq) + SAVE_ONE_BANKED(R11_fiq); SAVE_ONE_BANKED(R12_fiq); + mov pc, lr -#define DEFINE_TRAP_ENTRY(trap) \ - ALIGN; \ -trap_##trap: \ - SAVE_ALL; \ - adr lr, return_from_trap; \ - mov r0, sp; \ - mov r11, sp; \ - bic sp, #7; /* Align the stack pointer (noop on guest trap) */ \ - b do_trap_##trap +#define DEFINE_TRAP_ENTRY(trap) \ + ALIGN; \ +trap_##trap: \ + SAVE_ALL; \ + adr lr, return_from_trap; \ + mov r0, sp; \ + mov r11, sp; \ + bic sp, #7; /* Align the stack pointer (noop on guest trap) */ \ + b do_trap_##trap .globl hyp_traps_vector - .align 5 + .align 5 hyp_traps_vector: - .word 0 /* 0x00 - Reset */ - b trap_undefined_instruction /* 0x04 - Undefined Instruction */ - b trap_supervisor_call /* 0x08 - Supervisor Call */ - b trap_prefetch_abort /* 0x0c - Prefetch Abort */ - b trap_data_abort /* 0x10 - Data Abort */ - b trap_hypervisor /* 0x14 - Hypervisor */ - b trap_irq /* 0x18 - IRQ */ - b trap_fiq /* 0x1c - FIQ */ + .word 0 /* 0x00 - Reset */ + b trap_undefined_instruction /* 0x04 - Undefined Instruction */ + b trap_supervisor_call /* 0x08 - Supervisor Call */ + b trap_prefetch_abort /* 0x0c - Prefetch Abort */ + b trap_data_abort /* 0x10 - Data Abort */ + b trap_hypervisor /* 0x14 - Hypervisor */ + b trap_irq /* 0x18 - IRQ */ + b trap_fiq /* 0x1c - FIQ */ DEFINE_TRAP_ENTRY(undefined_instruction) DEFINE_TRAP_ENTRY(supervisor_call) @@ -74,34 +74,34 @@ DEFINE_TRAP_ENTRY(fiq) ENTRY(return_from_trap) - ldr r11, [sp, #UREGS_cpsr] - and r11, #PSR_MODE_MASK - cmp r11, #PSR_MODE_HYP - beq return_to_hypervisor + ldr r11, [sp, #UREGS_cpsr] + and r11, #PSR_MODE_MASK + cmp r11, #PSR_MODE_HYP + beq return_to_hypervisor ENTRY(return_to_guest) - mov r11, sp - bic sp, #7 /* Align the stack pointer */ - bl leave_hypervisor_tail - ldr r11, [sp, #UREGS_pc] - msr ELR_hyp, r11 - ldr r11, [sp, #UREGS_cpsr] - msr SPSR_hyp, r11 - RESTORE_ONE_BANKED(SP_usr) - RESTORE_BANKED(svc) - RESTORE_BANKED(abt) - RESTORE_BANKED(und) - RESTORE_BANKED(irq) - RESTORE_BANKED(fiq) - RESTORE_ONE_BANKED(R8_fiq); RESTORE_ONE_BANKED(R9_fiq); RESTORE_ONE_BANKED(R10_fiq) - RESTORE_ONE_BANKED(R11_fiq); RESTORE_ONE_BANKED(R12_fiq); - ldr lr, [sp, #UREGS_LR_usr] - pop {r0-r12} - add sp, #(UREGS_R8_fiq - UREGS_sp); /* SP, LR, SPSR, PC */ - eret + mov r11, sp + bic sp, #7 /* Align the stack pointer */ + bl leave_hypervisor_tail + ldr r11, [sp, #UREGS_pc] + msr ELR_hyp, r11 + ldr r11, [sp, #UREGS_cpsr] + msr SPSR_hyp, r11 + RESTORE_ONE_BANKED(SP_usr) + RESTORE_BANKED(svc) + RESTORE_BANKED(abt) + RESTORE_BANKED(und) + RESTORE_BANKED(irq) + RESTORE_BANKED(fiq) + RESTORE_ONE_BANKED(R8_fiq); RESTORE_ONE_BANKED(R9_fiq); RESTORE_ONE_BANKED(R10_fiq) + RESTORE_ONE_BANKED(R11_fiq); RESTORE_ONE_BANKED(R12_fiq); + ldr lr, [sp, #UREGS_LR_usr] + pop {r0-r12} + add sp, #(UREGS_R8_fiq - UREGS_sp); /* SP, LR, SPSR, PC */ + eret ENTRY(return_to_hypervisor) - ldr lr, [sp, #UREGS_lr] - pop {r0-r12} - add sp, #(UREGS_R8_fiq - UREGS_sp); /* SP, LR, SPSR, PC */ - eret + ldr lr, [sp, #UREGS_lr] + pop {r0-r12} + add sp, #(UREGS_R8_fiq - UREGS_sp); /* SP, LR, SPSR, PC */ + eret diff -r f3d137e3e6c7 -r 0ba87b95e80b xen/arch/arm/head.S --- a/xen/arch/arm/head.S Mon Feb 13 18:17:28 2012 +0000 +++ b/xen/arch/arm/head.S Mon Feb 13 17:26:08 2012 +0000 @@ -26,281 +26,281 @@ * Clobbers r0-r3. */ #ifdef EARLY_UART_ADDRESS #define PRINT(_s) \ - adr r0, 98f ; \ - bl puts ; \ - b 99f ; \ -98: .asciz _s ; \ - .align 2 ; \ + adr r0, 98f ; \ + bl puts ; \ + b 99f ; \ +98: .asciz _s ; \ + .align 2 ; \ 99: #else #define PRINT(s) #endif - .arm + .arm - /* This must be the very first address in the loaded image. - * It should be linked at XEN_VIRT_START, and loaded at any - * 2MB-aligned address. All of text+data+bss must fit in 2MB, - * or the initial pagetable code below will need adjustment. */ - .global start + /* This must be the very first address in the loaded image. + * It should be linked at XEN_VIRT_START, and loaded at any + * 2MB-aligned address. All of text+data+bss must fit in 2MB, + * or the initial pagetable code below will need adjustment. */ + .global start start: - cpsid aif /* Disable all interrupts */ + cpsid aif /* Disable all interrupts */ - /* Save the bootloader arguments in less-clobberable registers */ - mov r7, r1 /* r7 := ARM-linux machine type */ - mov r8, r2 /* r8 := ATAG base address */ + /* Save the bootloader arguments in less-clobberable registers */ + mov r7, r1 /* r7 := ARM-linux machine type */ + mov r8, r2 /* r8 := ATAG base address */ - /* Find out where we are */ - ldr r0, =start - adr r9, start /* r9 := paddr (start) */ - sub r10, r9, r0 /* r10 := phys-offset */ + /* Find out where we are */ + ldr r0, =start + adr r9, start /* r9 := paddr (start) */ + sub r10, r9, r0 /* r10 := phys-offset */ - /* Using the DTB in the .dtb section? */ + /* Using the DTB in the .dtb section? */ #ifdef CONFIG_DTB_FILE - ldr r8, =_sdtb - add r8, r10 /* r8 := paddr(DTB) */ + ldr r8, =_sdtb + add r8, r10 /* r8 := paddr(DTB) */ #endif #ifdef EARLY_UART_ADDRESS - /* Say hello */ - ldr r11, =EARLY_UART_ADDRESS /* r11 := UART base address */ - bl init_uart + /* Say hello */ + ldr r11, =EARLY_UART_ADDRESS /* r11 := UART base address */ + bl init_uart #endif - /* Check that this CPU has Hyp mode */ - mrc CP32(r0, ID_PFR1) - and r0, r0, #0xf000 /* Bits 12-15 define virt extensions */ - teq r0, #0x1000 /* Must == 0x1 or may be incompatible */ - beq 1f - bl putn - PRINT("- CPU doesn't support the virtualization extensions -\r\n") - b fail + /* Check that this CPU has Hyp mode */ + mrc CP32(r0, ID_PFR1) + and r0, r0, #0xf000 /* Bits 12-15 define virt extensions */ + teq r0, #0x1000 /* Must == 0x1 or may be incompatible */ + beq 1f + bl putn + PRINT("- CPU doesn't support the virtualization extensions -\r\n") + b fail 1: - /* Check if we're already in it */ - mrs r0, cpsr - and r0, r0, #0x1f /* Mode is in the low 5 bits of CPSR */ - teq r0, #0x1a /* Hyp Mode? */ - bne 1f - PRINT("- Started in Hyp mode -\r\n") - b hyp + /* Check if we're already in it */ + mrs r0, cpsr + and r0, r0, #0x1f /* Mode is in the low 5 bits of CPSR */ + teq r0, #0x1a /* Hyp Mode? */ + bne 1f + PRINT("- Started in Hyp mode -\r\n") + b hyp 1: - /* Otherwise, it must have been Secure Supervisor mode */ - mrc CP32(r0, SCR) - tst r0, #0x1 /* Not-Secure bit set? */ - beq 1f - PRINT("- CPU is not in Hyp mode or Secure state -\r\n") - b fail + /* Otherwise, it must have been Secure Supervisor mode */ + mrc CP32(r0, SCR) + tst r0, #0x1 /* Not-Secure bit set? */ + beq 1f + PRINT("- CPU is not in Hyp mode or Secure state -\r\n") + b fail 1: - /* OK, we're in Secure state. */ - PRINT("- Started in Secure state -\r\n- Entering Hyp mode -\r\n") + /* OK, we're in Secure state. */ + PRINT("- Started in Secure state -\r\n- Entering Hyp mode -\r\n") - /* Dance into Hyp mode */ - cpsid aif, #0x16 /* Enter Monitor mode */ - mrc CP32(r0, SCR) - orr r0, r0, #0x100 /* Set HCE */ - orr r0, r0, #0xb1 /* Set SCD, AW, FW and NS */ - bic r0, r0, #0xe /* Clear EA, FIQ and IRQ */ - mcr CP32(r0, SCR) - /* Ugly: the system timer's frequency register is only - * programmable in Secure state. Since we don't know where its - * memory-mapped control registers live, we can't find out the - * right frequency. Use the VE model's default frequency here. */ - ldr r0, =0x5f5e100 /* 100 MHz */ - mcr CP32(r0, CNTFRQ) - ldr r0, =0x40c00 /* SMP, c11, c10 in non-secure mode */ - mcr CP32(r0, NSACR) - /* Continuing ugliness: Set up the GIC so NS state owns interrupts */ - mov r0, #GIC_BASE_ADDRESS - add r0, r0, #GIC_DR_OFFSET - mov r1, #0 - str r1, [r0] /* Disable delivery in the distributor */ - add r0, r0, #0x80 /* GICD_IGROUP0 */ - mov r2, #0xffffffff /* All interrupts to group 1 */ - str r2, [r0] - str r2, [r0, #4] - str r2, [r0, #8] - /* Must drop priority mask below 0x80 before entering NS state */ - mov r0, #GIC_BASE_ADDRESS - add r0, r0, #GIC_CR_OFFSET - ldr r1, =0xff - str r1, [r0, #0x4] /* -> GICC_PMR */ - /* Reset a few config registers */ - mov r0, #0 - mcr CP32(r0, FCSEIDR) - mcr CP32(r0, CONTEXTIDR) - /* FIXME: ought to reset some other NS control regs here */ - adr r1, 1f - adr r0, hyp /* Store paddr (hyp entry point) */ - str r0, [r1] /* where we can use it for RFE */ - isb /* Ensure we see the stored target address */ - rfeia r1 /* Enter Hyp mode */ + /* Dance into Hyp mode */ + cpsid aif, #0x16 /* Enter Monitor mode */ + mrc CP32(r0, SCR) + orr r0, r0, #0x100 /* Set HCE */ + orr r0, r0, #0xb1 /* Set SCD, AW, FW and NS */ + bic r0, r0, #0xe /* Clear EA, FIQ and IRQ */ + mcr CP32(r0, SCR) + /* Ugly: the system timer's frequency register is only + * programmable in Secure state. Since we don't know where its + * memory-mapped control registers live, we can't find out the + * right frequency. Use the VE model's default frequency here. */ + ldr r0, =0x5f5e100 /* 100 MHz */ + mcr CP32(r0, CNTFRQ) + ldr r0, =0x40c00 /* SMP, c11, c10 in non-secure mode */ + mcr CP32(r0, NSACR) + /* Continuing ugliness: Set up the GIC so NS state owns interrupts */ + mov r0, #GIC_BASE_ADDRESS + add r0, r0, #GIC_DR_OFFSET + mov r1, #0 + str r1, [r0] /* Disable delivery in the distributor */ + add r0, r0, #0x80 /* GICD_IGROUP0 */ + mov r2, #0xffffffff /* All interrupts to group 1 */ + str r2, [r0] + str r2, [r0, #4] + str r2, [r0, #8] + /* Must drop priority mask below 0x80 before entering NS state */ + mov r0, #GIC_BASE_ADDRESS + add r0, r0, #GIC_CR_OFFSET + ldr r1, =0xff + str r1, [r0, #0x4] /* -> GICC_PMR */ + /* Reset a few config registers */ + mov r0, #0 + mcr CP32(r0, FCSEIDR) + mcr CP32(r0, CONTEXTIDR) + /* FIXME: ought to reset some other NS control regs here */ + adr r1, 1f + adr r0, hyp /* Store paddr (hyp entry point) */ + str r0, [r1] /* where we can use it for RFE */ + isb /* Ensure we see the stored target address */ + rfeia r1 /* Enter Hyp mode */ -1: .word 0 /* PC to enter Hyp mode at */ - .word 0x000001da /* CPSR: LE, Abort/IRQ/FIQ off, Hyp */ +1: .word 0 /* PC to enter Hyp mode at */ + .word 0x000001da /* CPSR: LE, Abort/IRQ/FIQ off, Hyp */ hyp: - PRINT("- Setting up control registers -\r\n") + PRINT("- Setting up control registers -\r\n") - /* Set up memory attribute type tables */ - ldr r0, =MAIR0VAL - ldr r1, =MAIR1VAL - mcr CP32(r0, MAIR0) - mcr CP32(r1, MAIR1) - mcr CP32(r0, HMAIR0) - mcr CP32(r1, HMAIR1) + /* Set up memory attribute type tables */ + ldr r0, =MAIR0VAL + ldr r1, =MAIR1VAL + mcr CP32(r0, MAIR0) + mcr CP32(r1, MAIR1) + mcr CP32(r0, HMAIR0) + mcr CP32(r1, HMAIR1) - /* Set up the HTCR: - * PT walks use Outer-Shareable accesses, - * PT walks are write-back, no-write-allocate in both cache levels, - * Full 32-bit address space goes through this table. */ - ldr r0, =0x80002500 - mcr CP32(r0, HTCR) + /* Set up the HTCR: + * PT walks use Outer-Shareable accesses, + * PT walks are write-back, no-write-allocate in both cache levels, + * Full 32-bit address space goes through this table. */ + ldr r0, =0x80002500 + mcr CP32(r0, HTCR) - /* Set up the HSCTLR: - * Exceptions in LE ARM, - * Low-latency IRQs disabled, - * Write-implies-XN disabled (for now), - * I-cache and d-cache enabled, - * Alignment checking enabled, - * MMU translation disabled (for now). */ - ldr r0, =(HSCTLR_BASE|SCTLR_A|SCTLR_C) - mcr CP32(r0, HSCTLR) + /* Set up the HSCTLR: + * Exceptions in LE ARM, + * Low-latency IRQs disabled, + * Write-implies-XN disabled (for now), + * I-cache and d-cache enabled, + * Alignment checking enabled, + * MMU translation disabled (for now). */ + ldr r0, =(HSCTLR_BASE|SCTLR_A|SCTLR_C) + mcr CP32(r0, HSCTLR) - /* Write Xen's PT's paddr into the HTTBR */ - ldr r4, =xen_pgtable - add r4, r4, r10 /* r4 := paddr (xen_pagetable) */ - mov r5, #0 /* r4:r5 is paddr (xen_pagetable) */ - mcrr CP64(r4, r5, HTTBR) + /* Write Xen's PT's paddr into the HTTBR */ + ldr r4, =xen_pgtable + add r4, r4, r10 /* r4 := paddr (xen_pagetable) */ + mov r5, #0 /* r4:r5 is paddr (xen_pagetable) */ + mcrr CP64(r4, r5, HTTBR) - /* Build the baseline idle pagetable's first-level entries */ - ldr r1, =xen_second - add r1, r1, r10 /* r1 := paddr (xen_second) */ - mov r3, #0x0 - orr r2, r1, #0xe00 /* r2:r3 := table map of xen_second */ - orr r2, r2, #0x07f /* (+ rights for linear PT) */ - strd r2, r3, [r4, #0] /* Map it in slot 0 */ - add r2, r2, #0x1000 - strd r2, r3, [r4, #8] /* Map 2nd page in slot 1 */ - add r2, r2, #0x1000 - strd r2, r3, [r4, #16] /* Map 3rd page in slot 2 */ - add r2, r2, #0x1000 - strd r2, r3, [r4, #24] /* Map 4th page in slot 3 */ + /* Build the baseline idle pagetable's first-level entries */ + ldr r1, =xen_second + add r1, r1, r10 /* r1 := paddr (xen_second) */ + mov r3, #0x0 + orr r2, r1, #0xe00 /* r2:r3 := table map of xen_second */ + orr r2, r2, #0x07f /* (+ rights for linear PT) */ + strd r2, r3, [r4, #0] /* Map it in slot 0 */ + add r2, r2, #0x1000 + strd r2, r3, [r4, #8] /* Map 2nd page in slot 1 */ + add r2, r2, #0x1000 + strd r2, r3, [r4, #16] /* Map 3rd page in slot 2 */ + add r2, r2, #0x1000 + strd r2, r3, [r4, #24] /* Map 4th page in slot 3 */ - /* Now set up the second-level entries */ - orr r2, r9, #0xe00 - orr r2, r2, #0x07d /* r2:r3 := 2MB normal map of Xen */ - mov r4, r9, lsr #18 /* Slot for paddr(start) */ - strd r2, r3, [r1, r4] /* Map Xen there */ - ldr r4, =start - lsr r4, #18 /* Slot for vaddr(start) */ - strd r2, r3, [r1, r4] /* Map Xen there too */ + /* Now set up the second-level entries */ + orr r2, r9, #0xe00 + orr r2, r2, #0x07d /* r2:r3 := 2MB normal map of Xen */ + mov r4, r9, lsr #18 /* Slot for paddr(start) */ + strd r2, r3, [r1, r4] /* Map Xen there */ + ldr r4, =start + lsr r4, #18 /* Slot for vaddr(start) */ + strd r2, r3, [r1, r4] /* Map Xen there too */ #ifdef EARLY_UART_ADDRESS - ldr r3, =(1<<(54-32)) /* NS for device mapping */ - lsr r2, r11, #21 - lsl r2, r2, #21 /* 2MB-aligned paddr of UART */ - orr r2, r2, #0xe00 - orr r2, r2, #0x071 /* r2:r3 := 2MB dev map including UART */ - add r4, r4, #8 - strd r2, r3, [r1, r4] /* Map it in the fixmap's slot */ + ldr r3, =(1<<(54-32)) /* NS for device mapping */ + lsr r2, r11, #21 + lsl r2, r2, #21 /* 2MB-aligned paddr of UART */ + orr r2, r2, #0xe00 + orr r2, r2, #0x071 /* r2:r3 := 2MB dev map including UART */ + add r4, r4, #8 + strd r2, r3, [r1, r4] /* Map it in the fixmap's slot */ #else - add r4, r4, #8 /* Skip over unused fixmap slot */ + add r4, r4, #8 /* Skip over unused fixmap slot */ #endif - mov r3, #0x0 - lsr r2, r8, #21 - lsl r2, r2, #21 /* 2MB-aligned paddr of DTB */ - orr r2, r2, #0xf00 - orr r2, r2, #0x07d /* r2:r3 := 2MB RAM incl. DTB */ - add r4, r4, #8 - strd r2, r3, [r1, r4] /* Map it in the early boot slot */ + mov r3, #0x0 + lsr r2, r8, #21 + lsl r2, r2, #21 /* 2MB-aligned paddr of DTB */ + orr r2, r2, #0xf00 + orr r2, r2, #0x07d /* r2:r3 := 2MB RAM incl. DTB */ + add r4, r4, #8 + strd r2, r3, [r1, r4] /* Map it in the early boot slot */ - PRINT("- Turning on paging -\r\n") + PRINT("- Turning on paging -\r\n") - ldr r1, =paging /* Explicit vaddr, not RIP-relative */ - mrc CP32(r0, HSCTLR) - orr r0, r0, #0x1 /* Add in the MMU enable bit */ - dsb /* Flush PTE writes and finish reads */ - mcr CP32(r0, HSCTLR) /* now paging is enabled */ - isb /* Now, flush the icache */ - mov pc, r1 /* Get a proper vaddr into PC */ + ldr r1, =paging /* Explicit vaddr, not RIP-relative */ + mrc CP32(r0, HSCTLR) + orr r0, r0, #0x1 /* Add in the MMU enable bit */ + dsb /* Flush PTE writes and finish reads */ + mcr CP32(r0, HSCTLR) /* now paging is enabled */ + isb /* Now, flush the icache */ + mov pc, r1 /* Get a proper vaddr into PC */ paging: #ifdef EARLY_UART_ADDRESS - /* Recover the UART address in the new address space */ - lsl r11, #11 - lsr r11, #11 /* UART base's offset from 2MB base */ - adr r0, start - add r0, r0, #0x200000 /* vaddr of the fixmap's 2MB slot */ - add r11, r11, r0 /* r11 := vaddr (UART base address) */ + /* Recover the UART address in the new address space */ + lsl r11, #11 + lsr r11, #11 /* UART base's offset from 2MB base */ + adr r0, start + add r0, r0, #0x200000 /* vaddr of the fixmap's 2MB slot */ + add r11, r11, r0 /* r11 := vaddr (UART base address) */ #endif - PRINT("- Entering C -\r\n") + PRINT("- Entering C -\r\n") - ldr sp, =init_stack /* Supply a stack */ - add sp, #STACK_SIZE /* (which grows down from the top). */ - sub sp, #CPUINFO_sizeof /* Make room for CPU save record */ - mov r0, r10 /* Marshal args: - phys_offset */ - mov r1, r7 /* - machine type */ - mov r2, r8 /* - ATAG address */ - b start_xen /* and disappear into the land of C */ + ldr sp, =init_stack /* Supply a stack */ + add sp, #STACK_SIZE /* (which grows down from the top). */ + sub sp, #CPUINFO_sizeof /* Make room for CPU save record */ + mov r0, r10 /* Marshal args: - phys_offset */ + mov r1, r7 /* - machine type */ + mov r2, r8 /* - ATAG address */ + b start_xen /* and disappear into the land of C */ /* Fail-stop * r0: string explaining why */ -fail: PRINT("- Boot failed -\r\n") -1: wfe - b 1b +fail: PRINT("- Boot failed -\r\n") +1: wfe + b 1b #ifdef EARLY_UART_ADDRESS /* Bring up the UART. Specific to the PL011 UART. * Clobbers r0-r2 */ init_uart: - mov r1, #0x0 - str r1, [r11, #0x24] /* -> UARTIBRD (Baud divisor fraction) */ - mov r1, #0x4 /* 7.3728MHz / 0x4 == 16 * 115200 */ - str r1, [r11, #0x24] /* -> UARTIBRD (Baud divisor integer) */ - mov r1, #0x60 /* 8n1 */ - str r1, [r11, #0x24] /* -> UARTLCR_H (Line control) */ - ldr r1, =0x00000301 /* RXE | TXE | UARTEN */ - str r1, [r11, #0x30] /* -> UARTCR (Control Register) */ - adr r0, 1f - b puts -1: .asciz "- UART enabled -\r\n" - .align 4 + mov r1, #0x0 + str r1, [r11, #0x24] /* -> UARTIBRD (Baud divisor fraction) */ + mov r1, #0x4 /* 7.3728MHz / 0x4 == 16 * 115200 */ + str r1, [r11, #0x24] /* -> UARTIBRD (Baud divisor integer) */ + mov r1, #0x60 /* 8n1 */ + str r1, [r11, #0x24] /* -> UARTLCR_H (Line control) */ + ldr r1, =0x00000301 /* RXE | TXE | UARTEN */ + str r1, [r11, #0x30] /* -> UARTCR (Control Register) */ + adr r0, 1f + b puts +1: .asciz "- UART enabled -\r\n" + .align 4 /* Print early debug messages. Specific to the PL011 UART. * r0: Nul-terminated string to print. * Clobbers r0-r2 */ puts: - ldr r2, [r11, #0x18] /* <- UARTFR (Flag register) */ - tst r2, #0x8 /* Check BUSY bit */ - bne puts /* Wait for the UART to be ready */ - ldrb r2, [r0], #1 /* Load next char */ - teq r2, #0 /* Exit on nul*/ - moveq pc, lr - str r2, [r11] /* -> UARTDR (Data Register) */ - b puts + ldr r2, [r11, #0x18] /* <- UARTFR (Flag register) */ + tst r2, #0x8 /* Check BUSY bit */ + bne puts /* Wait for the UART to be ready */ + ldrb r2, [r0], #1 /* Load next char */ + teq r2, #0 /* Exit on nul*/ + moveq pc, lr + str r2, [r11] /* -> UARTDR (Data Register) */ + b puts /* Print a 32-bit number in hex. Specific to the PL011 UART. * r0: Number to print. * clobbers r0-r3 */ putn: - adr r1, hex - mov r3, #8 -1: ldr r2, [r11, #0x18] /* <- UARTFR (Flag register) */ - tst r2, #0x8 /* Check BUSY bit */ - bne 1b /* Wait for the UART to be ready */ - and r2, r0, #0xf0000000 /* Mask off the top nybble */ - ldrb r2, [r1, r2, lsr #28] /* Convert to a char */ - str r2, [r11] /* -> UARTDR (Data Register) */ - lsl r0, #4 /* Roll it through one nybble at a time */ - subs r3, r3, #1 - bne 1b - adr r0, crlf /* Finish with a newline */ - b puts + adr r1, hex + mov r3, #8 +1: ldr r2, [r11, #0x18] /* <- UARTFR (Flag register) */ + tst r2, #0x8 /* Check BUSY bit */ + bne 1b /* Wait for the UART to be ready */ + and r2, r0, #0xf0000000 /* Mask off the top nybble */ + ldrb r2, [r1, r2, lsr #28] /* Convert to a char */ + str r2, [r11] /* -> UARTDR (Data Register) */ + lsl r0, #4 /* Roll it through one nybble at a time */ + subs r3, r3, #1 + bne 1b + adr r0, crlf /* Finish with a newline */ + b puts -crlf: .asciz "\r\n" -hex: .ascii "0123456789abcdef" - .align 2 +crlf: .asciz "\r\n" +hex: .ascii "0123456789abcdef" + .align 2 #else /* EARLY_UART_ADDRESS */ @@ -308,6 +308,6 @@ .global early_puts early_puts: puts: -putn: mov pc, lr +putn: mov pc, lr #endif /* EARLY_UART_ADDRESS */ diff -r f3d137e3e6c7 -r 0ba87b95e80b xen/arch/arm/lib/bitops.h --- a/xen/arch/arm/lib/bitops.h Mon Feb 13 18:17:28 2012 +0000 +++ b/xen/arch/arm/lib/bitops.h Mon Feb 13 17:26:08 2012 +0000 @@ -1,61 +1,61 @@ #include <xen/config.h> #if __LINUX_ARM_ARCH__ >= 6 - .macro bitop, instr - ands ip, r1, #3 - strneb r1, [ip] @ assert word-aligned - mov r2, #1 - and r3, r0, #31 @ Get bit offset - mov r0, r0, lsr #5 - add r1, r1, r0, lsl #2 @ Get word offset - mov r3, r2, lsl r3 -1: ldrex r2, [r1] - \instr r2, r2, r3 - strex r0, r2, [r1] - cmp r0, #0 - bne 1b - bx lr - .endm + .macro bitop, instr + ands ip, r1, #3 + strneb r1, [ip] @ assert word-aligned + mov r2, #1 + and r3, r0, #31 @ Get bit offset + mov r0, r0, lsr #5 + add r1, r1, r0, lsl #2 @ Get word offset + mov r3, r2, lsl r3 +1: ldrex r2, [r1] + \instr r2, r2, r3 + strex r0, r2, [r1] + cmp r0, #0 + bne 1b + bx lr + .endm - .macro testop, instr, store - ands ip, r1, #3 - strneb r1, [ip] @ assert word-aligned - mov r2, #1 - and r3, r0, #31 @ Get bit offset - mov r0, r0, lsr #5 - add r1, r1, r0, lsl #2 @ Get word offset - mov r3, r2, lsl r3 @ create mask - smp_dmb -1: ldrex r2, [r1] - ands r0, r2, r3 @ save old value of bit - \instr r2, r2, r3 @ toggle bit - strex ip, r2, [r1] - cmp ip, #0 - bne 1b - smp_dmb - cmp r0, #0 - movne r0, #1 -2: bx lr - .endm + .macro testop, instr, store + ands ip, r1, #3 + strneb r1, [ip] @ assert word-aligned + mov r2, #1 + and r3, r0, #31 @ Get bit offset + mov r0, r0, lsr #5 + add r1, r1, r0, lsl #2 @ Get word offset + mov r3, r2, lsl r3 @ create mask + smp_dmb +1: ldrex r2, [r1] + ands r0, r2, r3 @ save old value of bit + \instr r2, r2, r3 @ toggle bit + strex ip, r2, [r1] + cmp ip, #0 + bne 1b + smp_dmb + cmp r0, #0 + movne r0, #1 +2: bx lr + .endm #else - .macro bitop, name, instr -ENTRY( \name ) -UNWIND( .fnstart ) - ands ip, r1, #3 - strneb r1, [ip] @ assert word-aligned - and r2, r0, #31 - mov r0, r0, lsr #5 - mov r3, #1 - mov r3, r3, lsl r2 - save_and_disable_irqs ip - ldr r2, [r1, r0, lsl #2] - \instr r2, r2, r3 - str r2, [r1, r0, lsl #2] - restore_irqs ip - mov pc, lr -UNWIND( .fnend ) -ENDPROC(\name ) - .endm + .macro bitop, name, instr +ENTRY( \name ) +UNWIND( .fnstart ) + ands ip, r1, #3 + strneb r1, [ip] @ assert word-aligned + and r2, r0, #31 + mov r0, r0, lsr #5 + mov r3, #1 + mov r3, r3, lsl r2 + save_and_disable_irqs ip + ldr r2, [r1, r0, lsl #2] + \instr r2, r2, r3 + str r2, [r1, r0, lsl #2] + restore_irqs ip + mov pc, lr +UNWIND( .fnend ) +ENDPROC(\name ) + .endm /** * testop - implement a test_and_xxx_bit operation. @@ -65,23 +65,23 @@ * Note: we can trivially conditionalise the store instruction * to avoid dirtying the data cache. */ - .macro testop, name, instr, store -ENTRY( \name ) -UNWIND( .fnstart ) - ands ip, r1, #3 - strneb r1, [ip] @ assert word-aligned - and r3, r0, #31 - mov r0, r0, lsr #5 - save_and_disable_irqs ip - ldr r2, [r1, r0, lsl #2]! - mov r0, #1 - tst r2, r0, lsl r3 - \instr r2, r2, r0, lsl r3 - \store r2, [r1] - moveq r0, #0 - restore_irqs ip - mov pc, lr -UNWIND( .fnend ) -ENDPROC(\name ) - .endm + .macro testop, name, instr, store +ENTRY( \name ) +UNWIND( .fnstart ) + ands ip, r1, #3 + strneb r1, [ip] @ assert word-aligned + and r3, r0, #31 + mov r0, r0, lsr #5 + save_and_disable_irqs ip + ldr r2, [r1, r0, lsl #2]! + mov r0, #1 + tst r2, r0, lsl r3 + \instr r2, r2, r0, lsl r3 + \store r2, [r1] + moveq r0, #0 + restore_irqs ip + mov pc, lr +UNWIND( .fnend ) +ENDPROC(\name ) + .endm #endif diff -r f3d137e3e6c7 -r 0ba87b95e80b xen/arch/arm/lib/changebit.S --- a/xen/arch/arm/lib/changebit.S Mon Feb 13 18:17:28 2012 +0000 +++ b/xen/arch/arm/lib/changebit.S Mon Feb 13 17:26:08 2012 +0000 @@ -14,5 +14,5 @@ .text ENTRY(_change_bit) - bitop eor + bitop eor ENDPROC(_change_bit) diff -r f3d137e3e6c7 -r 0ba87b95e80b xen/arch/arm/lib/clearbit.S --- a/xen/arch/arm/lib/clearbit.S Mon Feb 13 18:17:28 2012 +0000 +++ b/xen/arch/arm/lib/clearbit.S Mon Feb 13 17:26:08 2012 +0000 @@ -15,5 +15,5 @@ .text ENTRY(_clear_bit) - bitop bic + bitop bic ENDPROC(_clear_bit) diff -r f3d137e3e6c7 -r 0ba87b95e80b xen/arch/arm/lib/copy_template.S --- a/xen/arch/arm/lib/copy_template.S Mon Feb 13 18:17:28 2012 +0000 +++ b/xen/arch/arm/lib/copy_template.S Mon Feb 13 17:26:08 2012 +0000 @@ -3,9 +3,9 @@ * * Code template for optimized memory copy functions * - * Author: Nicolas Pitre - * Created: Sep 28, 2005 - * Copyright: MontaVista Software, Inc. + * Author: Nicolas Pitre + * Created: Sep 28, 2005 + * Copyright: MontaVista Software, Inc. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -24,227 +24,227 @@ * * ldr1w ptr reg abort * - * This loads one word from 'ptr', stores it in 'reg' and increments - * 'ptr' to the next word. The 'abort' argument is used for fixup tables. + * This loads one word from 'ptr', stores it in 'reg' and increments + * 'ptr' to the next word. The 'abort' argument is used for fixup tables. * * ldr4w ptr reg1 reg2 reg3 reg4 abort * ldr8w ptr, reg1 reg2 reg3 reg4 reg5 reg6 reg7 reg8 abort * - * This loads four or eight words starting from 'ptr', stores them - * in provided registers and increments 'ptr' past those words. - * The'abort' argument is used for fixup tables. + * This loads four or eight words starting from 'ptr', stores them + * in provided registers and increments 'ptr' past those words. + * The'abort' argument is used for fixup tables. * * ldr1b ptr reg cond abort * - * Similar to ldr1w, but it loads a byte and increments 'ptr' one byte. - * It also must apply the condition code if provided, otherwise the - * "al" condition is assumed by default. + * Similar to ldr1w, but it loads a byte and increments 'ptr' one byte. + * It also must apply the condition code if provided, otherwise the + * "al" condition is assumed by default. * * str1w ptr reg abort * str8w ptr reg1 reg2 reg3 reg4 reg5 reg6 reg7 reg8 abort * str1b ptr reg cond abort * - * Same as their ldr* counterparts, but data is stored to 'ptr' location - * rather than being loaded. + * Same as their ldr* counterparts, but data is stored to 'ptr' location + * rather than being loaded. * * enter reg1 reg2 * - * Preserve the provided registers on the stack plus any additional - * data as needed by the implementation including this code. Called - * upon code entry. + * Preserve the provided registers on the stack plus any additional + * data as needed by the implementation including this code. Called + * upon code entry. * * exit reg1 reg2 * - * Restore registers with the values previously saved with the - * 'preserv' macro. Called upon code termination. + * Restore registers with the values previously saved with the + * 'preserv' macro. Called upon code termination. * * LDR1W_SHIFT * STR1W_SHIFT * - * Correction to be applied to the "ip" register when branching into - * the ldr1w or str1w instructions (some of these macros may expand to - * than one 32bit instruction in Thumb-2) + * Correction to be applied to the "ip" register when branching into + * the ldr1w or str1w instructions (some of these macros may expand to + * than one 32bit instruction in Thumb-2) */ - enter r4, lr + enter r4, lr - subs r2, r2, #4 - blt 8f - ands ip, r0, #3 - PLD( pld [r1, #0] ) - bne 9f - ands ip, r1, #3 - bne 10f + subs r2, r2, #4 + blt 8f + ands ip, r0, #3 + PLD( pld [r1, #0] ) + bne 9f + ands ip, r1, #3 + bne 10f -1: subs r2, r2, #(28) - stmfd sp!, {r5 - r8} - blt 5f +1: subs r2, r2, #(28) + stmfd sp!, {r5 - r8} + blt 5f - CALGN( ands ip, r0, #31 ) - CALGN( rsb r3, ip, #32 ) - CALGN( sbcnes r4, r3, r2 ) @ C is always set here - CALGN( bcs 2f ) - CALGN( adr r4, 6f ) - CALGN( subs r2, r2, r3 ) @ C gets set - CALGN( add pc, r4, ip ) + CALGN( ands ip, r0, #31 ) + CALGN( rsb r3, ip, #32 ) + CALGN( sbcnes r4, r3, r2 ) @ C is always set here + CALGN( bcs 2f ) + CALGN( adr r4, 6f ) + CALGN( subs r2, r2, r3 ) @ C gets set + CALGN( add pc, r4, ip ) - PLD( pld [r1, #0] ) -2: PLD( subs r2, r2, #96 ) - PLD( pld [r1, #28] ) - PLD( blt 4f ) - PLD( pld [r1, #60] ) - PLD( pld [r1, #92] ) + PLD( pld [r1, #0] ) +2: PLD( subs r2, r2, #96 ) + PLD( pld [r1, #28] ) + PLD( blt 4f ) + PLD( pld [r1, #60] ) + PLD( pld [r1, #92] ) -3: PLD( pld [r1, #124] ) -4: ldr8w r1, r3, r4, r5, r6, r7, r8, ip, lr, abort=20f - subs r2, r2, #32 - str8w r0, r3, r4, r5, r6, r7, r8, ip, lr, abort=20f - bge 3b - PLD( cmn r2, #96 ) - PLD( bge 4b ) +3: PLD( pld [r1, #124] ) +4: ldr8w r1, r3, r4, r5, r6, r7, r8, ip, lr, abort=20f + subs r2, r2, #32 + str8w r0, r3, r4, r5, r6, r7, r8, ip, lr, abort=20f + bge 3b + PLD( cmn r2, #96 ) + PLD( bge 4b ) -5: ands ip, r2, #28 - rsb ip, ip, #32 +5: ands ip, r2, #28 + rsb ip, ip, #32 #if LDR1W_SHIFT > 0 - lsl ip, ip, #LDR1W_SHIFT + lsl ip, ip, #LDR1W_SHIFT #endif - addne pc, pc, ip @ C is always clear here - b 7f + addne pc, pc, ip @ C is always clear here + b 7f 6: - .rept (1 << LDR1W_SHIFT) - W(nop) - .endr - ldr1w r1, r3, abort=20f - ldr1w r1, r4, abort=20f - ldr1w r1, r5, abort=20f - ldr1w r1, r6, abort=20f - ldr1w r1, r7, abort=20f - ldr1w r1, r8, abort=20f - ldr1w r1, lr, abort=20f + .rept (1 << LDR1W_SHIFT) + W(nop) + .endr + ldr1w r1, r3, abort=20f + ldr1w r1, r4, abort=20f + ldr1w r1, r5, abort=20f + ldr1w r1, r6, abort=20f + ldr1w r1, r7, abort=20f + ldr1w r1, r8, abort=20f + ldr1w r1, lr, abort=20f #if LDR1W_SHIFT < STR1W_SHIFT - lsl ip, ip, #STR1W_SHIFT - LDR1W_SHIFT + lsl ip, ip, #STR1W_SHIFT - LDR1W_SHIFT #elif LDR1W_SHIFT > STR1W_SHIFT - lsr ip, ip, #LDR1W_SHIFT - STR1W_SHIFT + lsr ip, ip, #LDR1W_SHIFT - STR1W_SHIFT #endif - add pc, pc, ip - nop - .rept (1 << STR1W_SHIFT) - W(nop) - .endr - str1w r0, r3, abort=20f - str1w r0, r4, abort=20f - str1w r0, r5, abort=20f - str1w r0, r6, abort=20f - str1w r0, r7, abort=20f - str1w r0, r8, abort=20f - str1w r0, lr, abort=20f + add pc, pc, ip + nop + .rept (1 << STR1W_SHIFT) + W(nop) + .endr + str1w r0, r3, abort=20f + str1w r0, r4, abort=20f + str1w r0, r5, abort=20f + str1w r0, r6, abort=20f + str1w r0, r7, abort=20f + str1w r0, r8, abort=20f + str1w r0, lr, abort=20f - CALGN( bcs 2b ) + CALGN( bcs 2b ) -7: ldmfd sp!, {r5 - r8} +7: ldmfd sp!, {r5 - r8} -8: movs r2, r2, lsl #31 - ldr1b r1, r3, ne, abort=21f - ldr1b r1, r4, cs, abort=21f - ldr1b r1, ip, cs, abort=21f - str1b r0, r3, ne, abort=21f - str1b r0, r4, cs, abort=21f - str1b r0, ip, cs, abort=21f +8: movs r2, r2, lsl #31 + ldr1b r1, r3, ne, abort=21f + ldr1b r1, r4, cs, abort=21f + ldr1b r1, ip, cs, abort=21f + str1b r0, r3, ne, abort=21f + str1b r0, r4, cs, abort=21f + str1b r0, ip, cs, abort=21f - exit r4, pc + exit r4, pc -9: rsb ip, ip, #4 - cmp ip, #2 - ldr1b r1, r3, gt, abort=21f - ldr1b r1, r4, ge, abort=21f - ldr1b r1, lr, abort=21f - str1b r0, r3, gt, abort=21f - str1b r0, r4, ge, abort=21f - subs r2, r2, ip - str1b r0, lr, abort=21f - blt 8b - ands ip, r1, #3 - beq 1b +9: rsb ip, ip, #4 + cmp ip, #2 + ldr1b r1, r3, gt, abort=21f + ldr1b r1, r4, ge, abort=21f + ldr1b r1, lr, abort=21f + str1b r0, r3, gt, abort=21f + str1b r0, r4, ge, abort=21f + subs r2, r2, ip + str1b r0, lr, abort=21f + blt 8b + ands ip, r1, #3 + beq 1b -10: bic r1, r1, #3 - cmp ip, #2 - ldr1w r1, lr, abort=21f - beq 17f - bgt 18f +10: bic r1, r1, #3 + cmp ip, #2 + ldr1w r1, lr, abort=21f + beq 17f + bgt 18f - .macro forward_copy_shift pull push + .macro forward_copy_shift pull push - subs r2, r2, #28 - blt 14f + subs r2, r2, #28 + blt 14f - CALGN( ands ip, r0, #31 ) - CALGN( rsb ip, ip, #32 ) - CALGN( sbcnes r4, ip, r2 ) @ C is always set here - CALGN( subcc r2, r2, ip ) - CALGN( bcc 15f ) + CALGN( ands ip, r0, #31 ) + CALGN( rsb ip, ip, #32 ) + CALGN( sbcnes r4, ip, r2 ) @ C is always set here + CALGN( subcc r2, r2, ip ) + CALGN( bcc 15f ) -11: stmfd sp!, {r5 - r9} +11: stmfd sp!, {r5 - r9} - PLD( pld [r1, #0] ) - PLD( subs r2, r2, #96 ) - PLD( pld [r1, #28] ) - PLD( blt 13f ) - PLD( pld [r1, #60] ) - PLD( pld [r1, #92] ) + PLD( pld [r1, #0] ) + PLD( subs r2, r2, #96 ) + PLD( pld [r1, #28] ) + PLD( blt 13f ) + PLD( pld [r1, #60] ) + PLD( pld [r1, #92] ) -12: PLD( pld [r1, #124] ) -13: ldr4w r1, r4, r5, r6, r7, abort=19f - mov r3, lr, pull #\pull - subs r2, r2, #32 - ldr4w r1, r8, r9, ip, lr, abort=19f - orr r3, r3, r4, push #\push - mov r4, r4, pull #\pull - orr r4, r4, r5, push #\push - mov r5, r5, pull #\pull - orr r5, r5, r6, push #\push - mov r6, r6, pull #\pull - orr r6, r6, r7, push #\push - mov r7, r7, pull #\pull - orr r7, r7, r8, push #\push - mov r8, r8, pull #\pull - orr r8, r8, r9, push #\push - mov r9, r9, pull #\pull - orr r9, r9, ip, push #\push - mov ip, ip, pull #\pull - orr ip, ip, lr, push #\push - str8w r0, r3, r4, r5, r6, r7, r8, r9, ip, , abort=19f - bge 12b - PLD( cmn r2, #96 ) - PLD( bge 13b ) +12: PLD( pld [r1, #124] ) +13: ldr4w r1, r4, r5, r6, r7, abort=19f + mov r3, lr, pull #\pull + subs r2, r2, #32 + ldr4w r1, r8, r9, ip, lr, abort=19f + orr r3, r3, r4, push #\push + mov r4, r4, pull #\pull + orr r4, r4, r5, push #\push + mov r5, r5, pull #\pull + orr r5, r5, r6, push #\push + mov r6, r6, pull #\pull + orr r6, r6, r7, push #\push + mov r7, r7, pull #\pull + orr r7, r7, r8, push #\push + mov r8, r8, pull #\pull + orr r8, r8, r9, push #\push + mov r9, r9, pull #\pull + orr r9, r9, ip, push #\push + mov ip, ip, pull #\pull + orr ip, ip, lr, push #\push + str8w r0, r3, r4, r5, r6, r7, r8, r9, ip, , abort=19f + bge 12b + PLD( cmn r2, #96 ) + PLD( bge 13b ) - ldmfd sp!, {r5 - r9} + ldmfd sp!, {r5 - r9} -14: ands ip, r2, #28 - beq 16f +14: ands ip, r2, #28 + beq 16f -15: mov r3, lr, pull #\pull - ldr1w r1, lr, abort=21f - subs ip, ip, #4 - orr r3, r3, lr, push #\push - str1w r0, r3, abort=21f - bgt 15b - CALGN( cmp r2, #0 ) - CALGN( bge 11b ) +15: mov r3, lr, pull #\pull + ldr1w r1, lr, abort=21f + subs ip, ip, #4 + orr r3, r3, lr, push #\push + str1w r0, r3, abort=21f + bgt 15b + CALGN( cmp r2, #0 ) + CALGN( bge 11b ) -16: sub r1, r1, #(\push / 8) - b 8b +16: sub r1, r1, #(\push / 8) + b 8b - .endm + .endm - forward_copy_shift pull=8 push=24 + forward_copy_shift pull=8 push=24 -17: forward_copy_shift pull=16 push=16 +17: forward_copy_shift pull=16 push=16 -18: forward_copy_shift pull=24 push=8 +18: forward_copy_shift pull=24 push=8 /* @@ -254,14 +254,14 @@ * the exit macro. */ - .macro copy_abort_preamble -19: ldmfd sp!, {r5 - r9} - b 21f -20: ldmfd sp!, {r5 - r8} + .macro copy_abort_preamble +19: ldmfd sp!, {r5 - r9} + b 21f +20: ldmfd sp!, {r5 - r8} 21: - .endm + .endm - .macro copy_abort_end - ldmfd sp!, {r4, pc} - .endm + .macro copy_abort_end + ldmfd sp!, {r4, pc} + .endm diff -r f3d137e3e6c7 -r 0ba87b95e80b xen/arch/arm/lib/div64.S --- a/xen/arch/arm/lib/div64.S Mon Feb 13 18:17:28 2012 +0000 +++ b/xen/arch/arm/lib/div64.S Mon Feb 13 17:26:08 2012 +0000 @@ -3,9 +3,9 @@ * * Optimized computation of 64-bit dividend / 32-bit divisor * - * Author: Nicolas Pitre - * Created: Oct 5, 2003 - * Copyright: Monta Vista Software, Inc. + * Author: Nicolas Pitre + * Created: Oct 5, 2003 + * Copyright: Monta Vista Software, Inc. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -14,7 +14,7 @@ #include <xen/config.h> #include "assembler.h" - + #ifdef __ARMEB__ #define xh r0 #define xl r1 @@ -34,12 +34,12 @@ * This is meant to be used by do_div() from include/asm/div64.h only. * * Input parameters: - * xh-xl = dividend (clobbered) - * r4 = divisor (preserved) + * xh-xl = dividend (clobbered) + * r4 = divisor (preserved) * * Output values: - * yh-yl = result - * xh = remainder + * yh-yl = result + * xh = remainder * * Clobbered regs: xl, ip */ @@ -47,165 +47,165 @@ ENTRY(__do_div64) UNWIND(.fnstart) - @ Test for easy paths first. - subs ip, r4, #1 - bls 9f @ divisor is 0 or 1 - tst ip, r4 - beq 8f @ divisor is power of 2 + @ Test for easy paths first. + subs ip, r4, #1 + bls 9f @ divisor is 0 or 1 + tst ip, r4 + beq 8f @ divisor is power of 2 - @ See if we need to handle upper 32-bit result. - cmp xh, r4 - mov yh, #0 - blo 3f + @ See if we need to handle upper 32-bit result. + cmp xh, r4 + mov yh, #0 + blo 3f - @ Align divisor with upper part of dividend. - @ The aligned divisor is stored in yl preserving the original. - @ The bit position is stored in ip. + @ Align divisor with upper part of dividend. + @ The aligned divisor is stored in yl preserving the original. + @ The bit position is stored in ip. #if __LINUX_ARM_ARCH__ >= 5 - clz yl, r4 - clz ip, xh - sub yl, yl, ip - mov ip, #1 - mov ip, ip, lsl yl - mov yl, r4, lsl yl + clz yl, r4 + clz ip, xh + sub yl, yl, ip + mov ip, #1 + mov ip, ip, lsl yl + mov yl, r4, lsl yl #else - mov yl, r4 - mov ip, #1 -1: cmp yl, #0x80000000 - cmpcc yl, xh - movcc yl, yl, lsl #1 - movcc ip, ip, lsl #1 - bcc 1b + mov yl, r4 + mov ip, #1 +1: cmp yl, #0x80000000 + cmpcc yl, xh + movcc yl, yl, lsl #1 + movcc ip, ip, lsl #1 + bcc 1b #endif - @ The division loop for needed upper bit positions. - @ Break out early if dividend reaches 0. -2: cmp xh, yl - orrcs yh, yh, ip - subcss xh, xh, yl - movnes ip, ip, lsr #1 - mov yl, yl, lsr #1 - bne 2b + @ The division loop for needed upper bit positions. + @ Break out early if dividend reaches 0. +2: cmp xh, yl + orrcs yh, yh, ip + subcss xh, xh, yl + movnes ip, ip, lsr #1 + mov yl, yl, lsr #1 + bne 2b - @ See if we need to handle lower 32-bit result. -3: cmp xh, #0 - mov yl, #0 - cmpeq xl, r4 - movlo xh, xl - movlo pc, lr + @ See if we need to handle lower 32-bit result. +3: cmp xh, #0 + mov yl, #0 + cmpeq xl, r4 + movlo xh, xl + movlo pc, lr - @ The division loop for lower bit positions. - @ Here we shift remainer bits leftwards rather than moving the - @ divisor for comparisons, considering the carry-out bit as well. - mov ip, #0x80000000 -4: movs xl, xl, lsl #1 - adcs xh, xh, xh - beq 6f - cmpcc xh, r4 -5: orrcs yl, yl, ip - subcs xh, xh, r4 - movs ip, ip, lsr #1 - bne 4b - mov pc, lr + @ The division loop for lower bit positions. + @ Here we shift remainer bits leftwards rather than moving the + @ divisor for comparisons, considering the carry-out bit as well. + mov ip, #0x80000000 +4: movs xl, xl, lsl #1 + adcs xh, xh, xh + beq 6f + cmpcc xh, r4 +5: orrcs yl, yl, ip + subcs xh, xh, r4 + movs ip, ip, lsr #1 + bne 4b + mov pc, lr - @ The top part of remainder became zero. If carry is set - @ (the 33th bit) this is a false positive so resume the loop. - @ Otherwise, if lower part is also null then we are done. -6: bcs 5b - cmp xl, #0 - moveq pc, lr + @ The top part of remainder became zero. If carry is set + @ (the 33th bit) this is a false positive so resume the loop. + @ Otherwise, if lower part is also null then we are done. +6: bcs 5b + cmp xl, #0 + moveq pc, lr - @ We still have remainer bits in the low part. Bring them up. + @ We still have remainer bits in the low part. Bring them up. #if __LINUX_ARM_ARCH__ >= 5 - clz xh, xl @ we know xh is zero here so... - add xh, xh, #1 - mov xl, xl, lsl xh - mov ip, ip, lsr xh + clz xh, xl @ we know xh is zero here so... + add xh, xh, #1 + mov xl, xl, lsl xh + mov ip, ip, lsr xh #else -7: movs xl, xl, lsl #1 - mov ip, ip, lsr #1 - bcc 7b +7: movs xl, xl, lsl #1 + mov ip, ip, lsr #1 + bcc 7b #endif - @ Current remainder is now 1. It is worthless to compare with - @ divisor at this point since divisor can not be smaller than 3 here. - @ If possible, branch for another shift in the division loop. - @ If no bit position left then we are done. - movs ip, ip, lsr #1 - mov xh, #1 - bne 4b - mov pc, lr + @ Current remainder is now 1. It is worthless to compare with + @ divisor at this point since divisor can not be smaller than 3 here. + @ If possible, branch for another shift in the division loop. + @ If no bit position left then we are done. + movs ip, ip, lsr #1 + mov xh, #1 + bne 4b + mov pc, lr -8: @ Division by a power of 2: determine what that divisor order is - @ then simply shift values around +8: @ Division by a power of 2: determine what that divisor order is + @ then simply shift values around #if __LINUX_ARM_ARCH__ >= 5 - clz ip, r4 - rsb ip, ip, #31 + clz ip, r4 + rsb ip, ip, #31 #else - mov yl, r4 - cmp r4, #(1 << 16) - mov ip, #0 - movhs yl, yl, lsr #16 - movhs ip, #16 + mov yl, r4 + cmp r4, #(1 << 16) + mov ip, #0 + movhs yl, yl, lsr #16 + movhs ip, #16 - cmp yl, #(1 << 8) - movhs yl, yl, lsr #8 - addhs ip, ip, #8 + cmp yl, #(1 << 8) + movhs yl, yl, lsr #8 + addhs ip, ip, #8 - cmp yl, #(1 << 4) - movhs yl, yl, lsr #4 - addhs ip, ip, #4 + cmp yl, #(1 << 4) + movhs yl, yl, lsr #4 + addhs ip, ip, #4 - cmp yl, #(1 << 2) - addhi ip, ip, #3 - addls ip, ip, yl, lsr #1 + cmp yl, #(1 << 2) + addhi ip, ip, #3 + addls ip, ip, yl, lsr #1 #endif - mov yh, xh, lsr ip - mov yl, xl, lsr ip - rsb ip, ip, #32 - ARM( orr yl, yl, xh, lsl ip ) - THUMB( lsl xh, xh, ip ) - THUMB( orr yl, yl, xh ) - mov xh, xl, lsl ip - mov xh, xh, lsr ip - mov pc, lr + mov yh, xh, lsr ip + mov yl, xl, lsr ip + rsb ip, ip, #32 + ARM( orr yl, yl, xh, lsl ip ) + THUMB( lsl xh, xh, ip ) + THUMB( orr yl, yl, xh ) + mov xh, xl, lsl ip + mov xh, xh, lsr ip + mov pc, lr - @ eq -> division by 1: obvious enough... -9: moveq yl, xl - moveq yh, xh - moveq xh, #0 - moveq pc, lr + @ eq -> division by 1: obvious enough... +9: moveq yl, xl + moveq yh, xh + moveq xh, #0 + moveq pc, lr UNWIND(.fnend) UNWIND(.fnstart) UNWIND(.pad #4) UNWIND(.save {lr}) Ldiv0_64: - @ Division by 0: - str lr, [sp, #-8]! - bl __div0 + @ Division by 0: + str lr, [sp, #-8]! + bl __div0 - @ as wrong as it could be... - mov yl, #0 - mov yh, #0 - mov xh, #0 - ldr pc, [sp], #8 + @ as wrong as it could be... + mov yl, #0 + mov yh, #0 + mov xh, #0 + ldr pc, [sp], #8 UNWIND(.fnend) ENDPROC(__do_div64) diff -r f3d137e3e6c7 -r 0ba87b95e80b xen/arch/arm/lib/findbit.S --- a/xen/arch/arm/lib/findbit.S Mon Feb 13 18:17:28 2012 +0000 +++ b/xen/arch/arm/lib/findbit.S Mon Feb 13 17:26:08 2012 +0000 @@ -24,20 +24,20 @@ * Prototype: int find_first_zero_bit(void *addr, unsigned int maxbit); */ ENTRY(_find_first_zero_bit_le) - teq r1, #0 - beq 3f - mov r2, #0 + teq r1, #0 + beq 3f + mov r2, #0 1: - ARM( ldrb r3, [r0, r2, lsr #3] ) - THUMB( lsr r3, r2, #3 ) - THUMB( ldrb r3, [r0, r3] ) - eors r3, r3, #0xff @ invert bits - bne .L_found @ any now set - found zero bit - add r2, r2, #8 @ next bit pointer -2: cmp r2, r1 @ any more? - blo 1b -3: mov r0, r1 @ no free bits - mov pc, lr + ARM( ldrb r3, [r0, r2, lsr #3] ) + THUMB( lsr r3, r2, #3 ) + THUMB( ldrb r3, [r0, r3] ) + eors r3, r3, #0xff @ invert bits + bne .L_found @ any now set - found zero bit + add r2, r2, #8 @ next bit pointer +2: cmp r2, r1 @ any more? + blo 1b +3: mov r0, r1 @ no free bits + mov pc, lr ENDPROC(_find_first_zero_bit_le) /* @@ -45,19 +45,19 @@ * Prototype: int find_next_zero_bit(void *addr, unsigned int maxbit, int offset) */ ENTRY(_find_next_zero_bit_le) - teq r1, #0 - beq 3b - ands ip, r2, #7 - beq 1b @ If new byte, goto old routine - ARM( ldrb r3, [r0, r2, lsr #3] ) - THUMB( lsr r3, r2, #3 ) - THUMB( ldrb r3, [r0, r3] ) - eor r3, r3, #0xff @ now looking for a 1 bit - movs r3, r3, lsr ip @ shift off unused bits - bne .L_found - orr r2, r2, #7 @ if zero, then no bits here - add r2, r2, #1 @ align bit pointer - b 2b @ loop for next bit + teq r1, #0 + beq 3b + ands ip, r2, #7 + beq 1b @ If new byte, goto old routine + ARM( ldrb r3, [r0, r2, lsr #3] ) + THUMB( lsr r3, r2, #3 ) + THUMB( ldrb r3, [r0, r3] ) + eor r3, r3, #0xff @ now looking for a 1 bit + movs r3, r3, lsr ip @ shift off unused bits + bne .L_found + orr r2, r2, #7 @ if zero, then no bits here + add r2, r2, #1 @ align bit pointer + b 2b @ loop for next bit ENDPROC(_find_next_zero_bit_le) /* @@ -65,20 +65,20 @@ * Prototype: int find_first_bit(const unsigned long *addr, unsigned int maxbit); */ ENTRY(_find_first_bit_le) - teq r1, #0 - beq 3f - mov r2, #0 + teq r1, #0 + beq 3f + mov r2, #0 1: - ARM( ldrb r3, [r0, r2, lsr #3] ) - THUMB( lsr r3, r2, #3 ) - THUMB( ldrb r3, [r0, r3] ) - movs r3, r3 - bne .L_found @ any now set - found zero bit - add r2, r2, #8 @ next bit pointer -2: cmp r2, r1 @ any more? - blo 1b -3: mov r0, r1 @ no free bits - mov pc, lr + ARM( ldrb r3, [r0, r2, lsr #3] ) + THUMB( lsr r3, r2, #3 ) + THUMB( ldrb r3, [r0, r3] ) + movs r3, r3 + bne .L_found @ any now set - found zero bit + add r2, r2, #8 @ next bit pointer +2: cmp r2, r1 @ any more? + blo 1b +3: mov r0, r1 @ no free bits + mov pc, lr ENDPROC(_find_first_bit_le) /* @@ -86,87 +86,87 @@ * Prototype: int find_next_zero_bit(void *addr, unsigned int maxbit, int offset) */ ENTRY(_find_next_bit_le) - teq r1, #0 - beq 3b - ands ip, r2, #7 - beq 1b @ If new byte, goto old routine - ARM( ldrb r3, [r0, r2, lsr #3] ) - THUMB( lsr r3, r2, #3 ) - THUMB( ldrb r3, [r0, r3] ) - movs r3, r3, lsr ip @ shift off unused bits - bne .L_found - orr r2, r2, #7 @ if zero, then no bits here - add r2, r2, #1 @ align bit pointer - b 2b @ loop for next bit + teq r1, #0 + beq 3b + ands ip, r2, #7 + beq 1b @ If new byte, goto old routine + ARM( ldrb r3, [r0, r2, lsr #3] ) + THUMB( lsr r3, r2, #3 ) + THUMB( ldrb r3, [r0, r3] ) + movs r3, r3, lsr ip @ shift off unused bits + bne .L_found + orr r2, r2, #7 @ if zero, then no bits here + add r2, r2, #1 @ align bit pointer + b 2b @ loop for next bit ENDPROC(_find_next_bit_le) #ifdef __ARMEB__ ENTRY(_find_first_zero_bit_be) - teq r1, #0 - beq 3f - mov r2, #0 -1: eor r3, r2, #0x18 @ big endian byte ordering - ARM( ldrb r3, [r0, r3, lsr #3] ) - THUMB( lsr r3, #3 ) - THUMB( ldrb r3, [r0, r3] ) - eors r3, r3, #0xff @ invert bits - bne .L_found @ any now set - found zero bit - add r2, r2, #8 @ next bit pointer -2: cmp r2, r1 @ any more? - blo 1b -3: mov r0, r1 @ no free bits - mov pc, lr + teq r1, #0 + beq 3f + mov r2, #0 +1: eor r3, r2, #0x18 @ big endian byte ordering + ARM( ldrb r3, [r0, r3, lsr #3] ) + THUMB( lsr r3, #3 ) + THUMB( ldrb r3, [r0, r3] ) + eors r3, r3, #0xff @ invert bits + bne .L_found @ any now set - found zero bit + add r2, r2, #8 @ next bit pointer +2: cmp r2, r1 @ any more? + blo 1b +3: mov r0, r1 @ no free bits + mov pc, lr ENDPROC(_find_first_zero_bit_be) ENTRY(_find_next_zero_bit_be) - teq r1, #0 - beq 3b - ands ip, r2, #7 - beq 1b @ If new byte, goto old routine - eor r3, r2, #0x18 @ big endian byte ordering - ARM( ldrb r3, [r0, r3, lsr #3] ) - THUMB( lsr r3, #3 ) - THUMB( ldrb r3, [r0, r3] ) - eor r3, r3, #0xff @ now looking for a 1 bit - movs r3, r3, lsr ip @ shift off unused bits - bne .L_found - orr r2, r2, #7 @ if zero, then no bits here - add r2, r2, #1 @ align bit pointer - b 2b @ loop for next bit + teq r1, #0 + beq 3b + ands ip, r2, #7 + beq 1b @ If new byte, goto old routine + eor r3, r2, #0x18 @ big endian byte ordering + ARM( ldrb r3, [r0, r3, lsr #3] ) + THUMB( lsr r3, #3 ) + THUMB( ldrb r3, [r0, r3] ) + eor r3, r3, #0xff @ now looking for a 1 bit + movs r3, r3, lsr ip @ shift off unused bits + bne .L_found + orr r2, r2, #7 @ if zero, then no bits here + add r2, r2, #1 @ align bit pointer + b 2b @ loop for next bit ENDPROC(_find_next_zero_bit_be) ENTRY(_find_first_bit_be) - teq r1, #0 - beq 3f - mov r2, #0 -1: eor r3, r2, #0x18 @ big endian byte ordering - ARM( ldrb r3, [r0, r3, lsr #3] ) - THUMB( lsr r3, #3 ) - THUMB( ldrb r3, [r0, r3] ) - movs r3, r3 - bne .L_found @ any now set - found zero bit - add r2, r2, #8 @ next bit pointer -2: cmp r2, r1 @ any more? - blo 1b -3: mov r0, r1 @ no free bits - mov pc, lr + teq r1, #0 + beq 3f + mov r2, #0 +1: eor r3, r2, #0x18 @ big endian byte ordering + ARM( ldrb r3, [r0, r3, lsr #3] ) + THUMB( lsr r3, #3 ) + THUMB( ldrb r3, [r0, r3] ) + movs r3, r3 + bne .L_found @ any now set - found zero bit + add r2, r2, #8 @ next bit pointer +2: cmp r2, r1 @ any more? + blo 1b +3: mov r0, r1 @ no free bits + mov pc, lr ENDPROC(_find_first_bit_be) ENTRY(_find_next_bit_be) - teq r1, #0 - beq 3b - ands ip, r2, #7 - beq 1b @ If new byte, goto old routine - eor r3, r2, #0x18 @ big endian byte ordering - ARM( ldrb r3, [r0, r3, lsr #3] ) - THUMB( lsr r3, #3 ) - THUMB( ldrb r3, [r0, r3] ) - movs r3, r3, lsr ip @ shift off unused bits - bne .L_found - orr r2, r2, #7 @ if zero, then no bits here - add r2, r2, #1 @ align bit pointer - b 2b @ loop for next bit + teq r1, #0 + beq 3b + ands ip, r2, #7 + beq 1b @ If new byte, goto old routine + eor r3, r2, #0x18 @ big endian byte ordering + ARM( ldrb r3, [r0, r3, lsr #3] ) + THUMB( lsr r3, #3 ) + THUMB( ldrb r3, [r0, r3] ) + movs r3, r3, lsr ip @ shift off unused bits + bne .L_found + orr r2, r2, #7 @ if zero, then no bits here + add r2, r2, #1 @ align bit pointer + b 2b @ loop for next bit ENDPROC(_find_next_bit_be) #endif @@ -176,23 +176,23 @@ */ .L_found: #if __LINUX_ARM_ARCH__ >= 5 - rsb r0, r3, #0 - and r3, r3, r0 - clz r3, r3 - rsb r3, r3, #31 - add r0, r2, r3 + rsb r0, r3, #0 + and r3, r3, r0 + clz r3, r3 + rsb r3, r3, #31 + add r0, r2, r3 #else - tst r3, #0x0f - addeq r2, r2, #4 - movne r3, r3, lsl #4 - tst r3, #0x30 - addeq r2, r2, #2 - movne r3, r3, lsl #2 - tst r3, #0x40 - addeq r2, r2, #1 - mov r0, r2 + tst r3, #0x0f + addeq r2, r2, #4 + movne r3, r3, lsl #4 + tst r3, #0x30 + addeq r2, r2, #2 + movne r3, r3, lsl #2 + tst r3, #0x40 + addeq r2, r2, #1 + mov r0, r2 #endif - cmp r1, r0 @ Clamp to maxbit - movlo r0, r1 - mov pc, lr + cmp r1, r0 @ Clamp to maxbit + movlo r0, r1 + mov pc, lr diff -r f3d137e3e6c7 -r 0ba87b95e80b xen/arch/arm/lib/lib1funcs.S --- a/xen/arch/arm/lib/lib1funcs.S Mon Feb 13 18:17:28 2012 +0000 +++ b/xen/arch/arm/lib/lib1funcs.S Mon Feb 13 17:26:08 2012 +0000 @@ -40,64 +40,64 @@ #if __LINUX_ARM_ARCH__ >= 5 - clz \curbit, \divisor - clz \result, \dividend - sub \result, \curbit, \result - mov \curbit, #1 - mov \divisor, \divisor, lsl \result - mov \curbit, \curbit, lsl \result - mov \result, #0 - + clz \curbit, \divisor + clz \result, \dividend + sub \result, \curbit, \result + mov \curbit, #1 + mov \divisor, \divisor, lsl \result + mov \curbit, \curbit, lsl \result + mov \result, #0 + #else - @ Initially shift the divisor left 3 bits if possible, - @ set curbit accordingly. This allows for curbit to be located - @ at the left end of each 4 bit nibbles in the division loop - @ to save one loop in most cases. - tst \divisor, #0xe0000000 - moveq \divisor, \divisor, lsl #3 - moveq \curbit, #8 - movne \curbit, #1 + @ Initially shift the divisor left 3 bits if possible, + @ set curbit accordingly. This allows for curbit to be located + @ at the left end of each 4 bit nibbles in the division loop + @ to save one loop in most cases. + tst \divisor, #0xe0000000 + moveq \divisor, \divisor, lsl #3 + moveq \curbit, #8 + movne \curbit, #1 - @ Unless the divisor is very big, shift it up in multiples of - @ four bits, since this is the amount of unwinding in the main - @ division loop. Continue shifting until the divisor is - @ larger than the dividend. -1: cmp \divisor, #0x10000000 - cmplo \divisor, \dividend - movlo \divisor, \divisor, lsl #4 - movlo \curbit, \curbit, lsl #4 - blo 1b + @ Unless the divisor is very big, shift it up in multiples of + @ four bits, since this is the amount of unwinding in the main + @ division loop. Continue shifting until the divisor is + @ larger than the dividend. +1: cmp \divisor, #0x10000000 + cmplo \divisor, \dividend + movlo \divisor, \divisor, lsl #4 + movlo \curbit, \curbit, lsl #4 + blo 1b - @ For very big divisors, we must shift it a bit at a time, or - @ we will be in danger of overflowing. -1: cmp \divisor, #0x80000000 - cmplo \divisor, \dividend - movlo \divisor, \divisor, lsl #1 - movlo \curbit, \curbit, lsl #1 - blo 1b + @ For very big divisors, we must shift it a bit at a time, or + @ we will be in danger of overflowing. +1: cmp \divisor, #0x80000000 + cmplo \divisor, \dividend + movlo \divisor, \divisor, lsl #1 + movlo \curbit, \curbit, lsl #1 + blo 1b - mov \result, #0 + mov \result, #0 #endif - @ Division loop -1: cmp \dividend, \divisor - subhs \dividend, \dividend, \divisor - orrhs \result, \result, \curbit - cmp \dividend, \divisor, lsr #1 - subhs \dividend, \dividend, \divisor, lsr #1 - orrhs \result, \result, \curbit, lsr #1 - cmp \dividend, \divisor, lsr #2 - subhs \dividend, \dividend, \divisor, lsr #2 - orrhs \result, \result, \curbit, lsr #2 - cmp \dividend, \divisor, lsr #3 - subhs \dividend, \dividend, \divisor, lsr #3 - orrhs \result, \result, \curbit, lsr #3 - cmp \dividend, #0 @ Early termination? - movnes \curbit, \curbit, lsr #4 @ No, any more bits to do? - movne \divisor, \divisor, lsr #4 - bne 1b + @ Division loop +1: cmp \dividend, \divisor + subhs \dividend, \dividend, \divisor + orrhs \result, \result, \curbit + cmp \dividend, \divisor, lsr #1 + subhs \dividend, \dividend, \divisor, lsr #1 + orrhs \result, \result, \curbit, lsr #1 + cmp \dividend, \divisor, lsr #2 + subhs \dividend, \dividend, \divisor, lsr #2 + orrhs \result, \result, \curbit, lsr #2 + cmp \dividend, \divisor, lsr #3 + subhs \dividend, \dividend, \divisor, lsr #3 + orrhs \result, \result, \curbit, lsr #3 + cmp \dividend, #0 @ Early termination? + movnes \curbit, \curbit, lsr #4 @ No, any more bits to do? + movne \divisor, \divisor, lsr #4 + bne 1b .endm @@ -106,27 +106,27 @@ #if __LINUX_ARM_ARCH__ >= 5 - clz \order, \divisor - rsb \order, \order, #31 + clz \order, \divisor + rsb \order, \order, #31 #else - cmp \divisor, #(1 << 16) - movhs \divisor, \divisor, lsr #16 - movhs \order, #16 - movlo \order, #0 + cmp \divisor, #(1 << 16) + movhs \divisor, \divisor, lsr #16 + movhs \order, #16 + movlo \order, #0 - cmp \divisor, #(1 << 8) - movhs \divisor, \divisor, lsr #8 - addhs \order, \order, #8 + cmp \divisor, #(1 << 8) + movhs \divisor, \divisor, lsr #8 + addhs \order, \order, #8 - cmp \divisor, #(1 << 4) - movhs \divisor, \divisor, lsr #4 - addhs \order, \order, #4 + cmp \divisor, #(1 << 4) + movhs \divisor, \divisor, lsr #4 + addhs \order, \order, #4 - cmp \divisor, #(1 << 2) - addhi \order, \order, #3 - addls \order, \order, \divisor, lsr #1 + cmp \divisor, #(1 << 2) + addhi \order, \order, #3 + addls \order, \order, \divisor, lsr #1 #endif @@ -137,69 +137,69 @@ #if __LINUX_ARM_ARCH__ >= 5 - clz \order, \divisor - clz \spare, \dividend - sub \order, \order, \spare - mov \divisor, \divisor, lsl \order + clz \order, \divisor + clz \spare, \dividend + sub \order, \order, \spare + mov \divisor, \divisor, lsl \order #else - mov \order, #0 + mov \order, #0 - @ Unless the divisor is very big, shift it up in multiples of - @ four bits, since this is the amount of unwinding in the main - @ division loop. Continue shifting until the divisor is - @ larger than the dividend. -1: cmp \divisor, #0x10000000 - cmplo \divisor, \dividend - movlo \divisor, \divisor, lsl #4 - addlo \order, \order, #4 - blo 1b + @ Unless the divisor is very big, shift it up in multiples of + @ four bits, since this is the amount of unwinding in the main + @ division loop. Continue shifting until the divisor is + @ larger than the dividend. +1: cmp \divisor, #0x10000000 + cmplo \divisor, \dividend + movlo \divisor, \divisor, lsl #4 + addlo \order, \order, #4 + blo 1b - @ For very big divisors, we must shift it a bit at a time, or - @ we will be in danger of overflowing. -1: cmp \divisor, #0x80000000 - cmplo \divisor, \dividend - movlo \divisor, \divisor, lsl #1 - addlo \order, \order, #1 - blo 1b + @ For very big divisors, we must shift it a bit at a time, or + @ we will be in danger of overflowing. +1: cmp \divisor, #0x80000000 + cmplo \divisor, \dividend + movlo \divisor, \divisor, lsl #1 + addlo \order, \order, #1 + blo 1b #endif - @ Perform all needed substractions to keep only the reminder. - @ Do comparisons in batch of 4 first. - subs \order, \order, #3 @ yes, 3 is intended here - blt 2f + @ Perform all needed substractions to keep only the reminder. + @ Do comparisons in batch of 4 first. + subs \order, \order, #3 @ yes, 3 is intended here + blt 2f -1: cmp \dividend, \divisor - subhs \dividend, \dividend, \divisor - cmp \dividend, \divisor, lsr #1 - subhs \dividend, \dividend, \divisor, lsr #1 - cmp \dividend, \divisor, lsr #2 - subhs \dividend, \dividend, \divisor, lsr #2 - cmp \dividend, \divisor, lsr #3 - subhs \dividend, \dividend, \divisor, lsr #3 - cmp \dividend, #1 - mov \divisor, \divisor, lsr #4 - subges \order, \order, #4 - bge 1b +1: cmp \dividend, \divisor + subhs \dividend, \dividend, \divisor + cmp \dividend, \divisor, lsr #1 + subhs \dividend, \dividend, \divisor, lsr #1 + cmp \dividend, \divisor, lsr #2 + subhs \dividend, \dividend, \divisor, lsr #2 + cmp \dividend, \divisor, lsr #3 + subhs \dividend, \dividend, \divisor, lsr #3 + cmp \dividend, #1 + mov \divisor, \divisor, lsr #4 + subges \order, \order, #4 + bge 1b - tst \order, #3 - teqne \dividend, #0 - beq 5f + tst \order, #3 + teqne \dividend, #0 + beq 5f - @ Either 1, 2 or 3 comparison/substractions are left. -2: cmn \order, #2 - blt 4f - beq 3f - cmp \dividend, \divisor - subhs \dividend, \dividend, \divisor - mov \divisor, \divisor, lsr #1 -3: cmp \dividend, \divisor - subhs \dividend, \dividend, \divisor - mov \divisor, \divisor, lsr #1 -4: cmp \dividend, \divisor - subhs \dividend, \dividend, \divisor + @ Either 1, 2 or 3 comparison/substractions are left. +2: cmn \order, #2 + blt 4f + beq 3f + cmp \dividend, \divisor + subhs \dividend, \dividend, \divisor + mov \divisor, \divisor, lsr #1 +3: cmp \dividend, \divisor + subhs \dividend, \dividend, \divisor + mov \divisor, \divisor, lsr #1 +4: cmp \dividend, \divisor + subhs \dividend, \dividend, \divisor 5: .endm @@ -208,27 +208,27 @@ ENTRY(__aeabi_uidiv) UNWIND(.fnstart) - subs r2, r1, #1 - moveq pc, lr - bcc Ldiv0 - cmp r0, r1 - bls 11f - tst r1, r2 - beq 12f + subs r2, r1, #1 + moveq pc, lr + bcc Ldiv0 + cmp r0, r1 + bls 11f + tst r1, r2 + beq 12f - ARM_DIV_BODY r0, r1, r2, r3 + ARM_DIV_BODY r0, r1, r2, r3 - mov r0, r2 - mov pc, lr + mov r0, r2 + mov pc, lr -11: moveq r0, #1 - movne r0, #0 - mov pc, lr +11: moveq r0, #1 + movne r0, #0 + mov pc, lr -12: ARM_DIV2_ORDER r1, r2 +12: ARM_DIV2_ORDER r1, r2 - mov r0, r0, lsr r2 - mov pc, lr + mov r0, r0, lsr r2 + mov pc, lr UNWIND(.fnend) ENDPROC(__udivsi3) @@ -237,17 +237,17 @@ ENTRY(__umodsi3) UNWIND(.fnstart) - subs r2, r1, #1 @ compare divisor with 1 - bcc Ldiv0 - cmpne r0, r1 @ compare dividend with divisor - moveq r0, #0 - tsthi r1, r2 @ see if divisor is power of 2 - andeq r0, r0, r2 - movls pc, lr + subs r2, r1, #1 @ compare divisor with 1 + bcc Ldiv0 + cmpne r0, r1 @ compare dividend with divisor + moveq r0, #0 + tsthi r1, r2 @ see if divisor is power of 2 + andeq r0, r0, r2 + movls pc, lr - ARM_MOD_BODY r0, r1, r2, r3 + ARM_MOD_BODY r0, r1, r2, r3 - mov pc, lr + mov pc, lr UNWIND(.fnend) ENDPROC(__umodsi3) @@ -256,40 +256,40 @@ ENTRY(__aeabi_idiv) UNWIND(.fnstart) - cmp r1, #0 - eor ip, r0, r1 @ save the sign of the result. - beq Ldiv0 - rsbmi r1, r1, #0 @ loops below use unsigned. - subs r2, r1, #1 @ division by 1 or -1 ? - beq 10f - movs r3, r0 - rsbmi r3, r0, #0 @ positive dividend value - cmp r3, r1 - bls 11f - tst r1, r2 @ divisor is power of 2 ? - beq 12f + cmp r1, #0 + eor ip, r0, r1 @ save the sign of the result. + beq Ldiv0 + rsbmi r1, r1, #0 @ loops below use unsigned. + subs r2, r1, #1 @ division by 1 or -1 ? + beq 10f + movs r3, r0 + rsbmi r3, r0, #0 @ positive dividend value + cmp r3, r1 + bls 11f + tst r1, r2 @ divisor is power of 2 ? + beq 12f - ARM_DIV_BODY r3, r1, r0, r2 + ARM_DIV_BODY r3, r1, r0, r2 - cmp ip, #0 - rsbmi r0, r0, #0 - mov pc, lr + cmp ip, #0 + rsbmi r0, r0, #0 + mov pc, lr -10: teq ip, r0 @ same sign ? - rsbmi r0, r0, #0 - mov pc, lr +10: teq ip, r0 @ same sign ? + rsbmi r0, r0, #0 + mov pc, lr -11: movlo r0, #0 - moveq r0, ip, asr #31 - orreq r0, r0, #1 - mov pc, lr +11: movlo r0, #0 + moveq r0, ip, asr #31 + orreq r0, r0, #1 + mov pc, lr -12: ARM_DIV2_ORDER r1, r2 +12: ARM_DIV2_ORDER r1, r2 - cmp ip, #0 - mov r0, r3, lsr r2 - rsbmi r0, r0, #0 - mov pc, lr + cmp ip, #0 + mov r0, r3, lsr r2 + rsbmi r0, r0, #0 + mov pc, lr UNWIND(.fnend) ENDPROC(__divsi3) @@ -298,23 +298,23 @@ ENTRY(__modsi3) UNWIND(.fnstart) - cmp r1, #0 - beq Ldiv0 - rsbmi r1, r1, #0 @ loops below use unsigned. - movs ip, r0 @ preserve sign of dividend - rsbmi r0, r0, #0 @ if negative make positive - subs r2, r1, #1 @ compare divisor with 1 - cmpne r0, r1 @ compare dividend with divisor - moveq r0, #0 - tsthi r1, r2 @ see if divisor is power of 2 - andeq r0, r0, r2 - bls 10f + cmp r1, #0 + beq Ldiv0 + rsbmi r1, r1, #0 @ loops below use unsigned. + movs ip, r0 @ preserve sign of dividend + rsbmi r0, r0, #0 @ if negative make positive + subs r2, r1, #1 @ compare divisor with 1 + cmpne r0, r1 @ compare dividend with divisor + moveq r0, #0 + tsthi r1, r2 @ see if divisor is power of 2 + andeq r0, r0, r2 + bls 10f - ARM_MOD_BODY r0, r1, r2, r3 + ARM_MOD_BODY r0, r1, r2, r3 -10: cmp ip, #0 - rsbmi r0, r0, #0 - mov pc, lr +10: cmp ip, #0 + rsbmi r0, r0, #0 + mov pc, lr UNWIND(.fnend) ENDPROC(__modsi3) @@ -323,56 +323,56 @@ ENTRY(__aeabi_uidivmod) UNWIND(.fnstart) -UNWIND(.save {r0, r1, ip, lr} ) +UNWIND(.save {r0, r1, ip, lr} ) - stmfd sp!, {r0, r1, ip, lr} - bl __aeabi_uidiv - ldmfd sp!, {r1, r2, ip, lr} - mul r3, r0, r2 - sub r1, r1, r3 - mov pc, lr + stmfd sp!, {r0, r1, ip, lr} + bl __aeabi_uidiv + ldmfd sp!, {r1, r2, ip, lr} + mul r3, r0, r2 + sub r1, r1, r3 + mov pc, lr UNWIND(.fnend) ENDPROC(__aeabi_uidivmod) ENTRY(__aeabi_idivmod) UNWIND(.fnstart) -UNWIND(.save {r0, r1, ip, lr} ) - stmfd sp!, {r0, r1, ip, lr} - bl __aeabi_idiv - ldmfd sp!, {r1, r2, ip, lr} - mul r3, r0, r2 - sub r1, r1, r3 - mov pc, lr +UNWIND(.save {r0, r1, ip, lr} ) + stmfd sp!, {r0, r1, ip, lr} + bl __aeabi_idiv + ldmfd sp!, {r1, r2, ip, lr} + mul r3, r0, r2 + sub r1, r1, r3 + mov pc, lr UNWIND(.fnend) ENDPROC(__aeabi_idivmod) ENTRY(__aeabi_uldivmod) UNWIND(.fnstart) -UNWIND(.save {lr} ) - sub sp, sp, #8 - stmfd sp!, {sp, lr} - bl __qdivrem - ldr lr, [sp, #4] - add sp, sp, #8 - ldmfd sp!, {r2, r3} - mov pc, lr +UNWIND(.save {lr} ) + sub sp, sp, #8 + stmfd sp!, {sp, lr} + bl __qdivrem + ldr lr, [sp, #4] + add sp, sp, #8 + ldmfd sp!, {r2, r3} + mov pc, lr UNWIND(.fnend) ENDPROC(__aeabi_uldivmod) ENTRY(__aeabi_ldivmod) UNWIND(.fnstart) -UNWIND(.save {lr} ) - sub sp, sp, #16 - stmfd sp!, {sp, lr} - bl __ldivmod_helper - ldr lr, [sp, #4] - add sp, sp, #16 - ldmfd sp!, {r2, r3} - mov pc, lr - +UNWIND(.save {lr} ) + sub sp, sp, #16 + stmfd sp!, {sp, lr} + bl __ldivmod_helper + ldr lr, [sp, #4] + add sp, sp, #16 + ldmfd sp!, {r2, r3} + mov pc, lr + UNWIND(.fnend) ENDPROC(__aeabi_ldivmod) #endif @@ -381,9 +381,9 @@ UNWIND(.fnstart) UNWIND(.pad #4) UNWIND(.save {lr}) - str lr, [sp, #-8]! - bl __div0 - mov r0, #0 @ About as wrong as it could be. - ldr pc, [sp], #8 + str lr, [sp, #-8]! + bl __div0 + mov r0, #0 @ About as wrong as it could be. + ldr pc, [sp], #8 UNWIND(.fnend) ENDPROC(Ldiv0) diff -r f3d137e3e6c7 -r 0ba87b95e80b xen/arch/arm/lib/memcpy.S --- a/xen/arch/arm/lib/memcpy.S Mon Feb 13 18:17:28 2012 +0000 +++ b/xen/arch/arm/lib/memcpy.S Mon Feb 13 17:26:08 2012 +0000 @@ -1,9 +1,9 @@ /* * linux/arch/arm/lib/memcpy.S * - * Author: Nicolas Pitre - * Created: Sep 28, 2005 - * Copyright: MontaVista Software, Inc. + * Author: Nicolas Pitre + * Created: Sep 28, 2005 + * Copyright: MontaVista Software, Inc. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -13,46 +13,46 @@ #include <xen/config.h> #include "assembler.h" -#define LDR1W_SHIFT 0 -#define STR1W_SHIFT 0 +#define LDR1W_SHIFT 0 +#define STR1W_SHIFT 0 - .macro ldr1w ptr reg abort - W(ldr) \reg, [\ptr], #4 - .endm + .macro ldr1w ptr reg abort + W(ldr) \reg, [\ptr], #4 + .endm - .macro ldr4w ptr reg1 reg2 reg3 reg4 abort - ldmia \ptr!, {\reg1, \reg2, \reg3, \reg4} - .endm + .macro ldr4w ptr reg1 reg2 reg3 reg4 abort + ldmia \ptr!, {\reg1, \reg2, \reg3, \reg4} + .endm - .macro ldr8w ptr reg1 reg2 reg3 reg4 reg5 reg6 reg7 reg8 abort - ldmia \ptr!, {\reg1, \reg2, \reg3, \reg4, \reg5, \reg6, \reg7, \reg8} - .endm + .macro ldr8w ptr reg1 reg2 reg3 reg4 reg5 reg6 reg7 reg8 abort + ldmia \ptr!, {\reg1, \reg2, \reg3, \reg4, \reg5, \reg6, \reg7, \reg8} + .endm - .macro ldr1b ptr reg cond=al abort - ldr\cond\()b \reg, [\ptr], #1 - .endm + .macro ldr1b ptr reg cond=al abort + ldr\cond\()b \reg, [\ptr], #1 + .endm - .macro str1w ptr reg abort - W(str) \reg, [\ptr], #4 - .endm + .macro str1w ptr reg abort + W(str) \reg, [\ptr], #4 + .endm - .macro str8w ptr reg1 reg2 reg3 reg4 reg5 reg6 reg7 reg8 abort - stmia \ptr!, {\reg1, \reg2, \reg3, \reg4, \reg5, \reg6, \reg7, \reg8} - .endm + .macro str8w ptr reg1 reg2 reg3 reg4 reg5 reg6 reg7 reg8 abort + stmia \ptr!, {\reg1, \reg2, \reg3, \reg4, \reg5, \reg6, \reg7, \reg8} + .endm - .macro str1b ptr reg cond=al abort - str\cond\()b \reg, [\ptr], #1 - .endm + .macro str1b ptr reg cond=al abort + str\cond\()b \reg, [\ptr], #1 + .endm - .macro enter reg1 reg2 - stmdb sp!, {r0, \reg1, \reg2} - .endm + .macro enter reg1 reg2 + stmdb sp!, {r0, \reg1, \reg2} + .endm - .macro exit reg1 reg2 - ldmfd sp!, {r0, \reg1, \reg2} - .endm + .macro exit reg1 reg2 + ldmfd sp!, {r0, \reg1, \reg2} + .endm - .text + .text /* Prototype: void *memcpy(void *dest, const void *src, size_t n); */ diff -r f3d137e3e6c7 -r 0ba87b95e80b xen/arch/arm/lib/memmove.S --- a/xen/arch/arm/lib/memmove.S Mon Feb 13 18:17:28 2012 +0000 +++ b/xen/arch/arm/lib/memmove.S Mon Feb 13 17:26:08 2012 +0000 @@ -1,9 +1,9 @@ /* * linux/arch/arm/lib/memmove.S * - * Author: Nicolas Pitre - * Created: Sep 28, 2005 - * Copyright: (C) MontaVista Software Inc. + * Author: Nicolas Pitre + * Created: Sep 28, 2005 + * Copyright: (C) MontaVista Software Inc. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -14,7 +14,7 @@ #include "assembler.h" - .text + .text /* * Prototype: void *memmove(void *dest, const void *src, size_t n); @@ -29,172 +29,172 @@ ENTRY(memmove) - subs ip, r0, r1 - cmphi r2, ip - bls memcpy + subs ip, r0, r1 + cmphi r2, ip + bls memcpy - stmfd sp!, {r0, r4, lr} - add r1, r1, r2 - add r0, r0, r2 - subs r2, r2, #4 - blt 8f - ands ip, r0, #3 - PLD( pld [r1, #-4] ) - bne 9f - ands ip, r1, #3 - bne 10f + stmfd sp!, {r0, r4, lr} + add r1, r1, r2 + add r0, r0, r2 + subs r2, r2, #4 + blt 8f + ands ip, r0, #3 + PLD( pld [r1, #-4] ) + bne 9f + ands ip, r1, #3 + bne 10f -1: subs r2, r2, #(28) - stmfd sp!, {r5 - r8} - blt 5f +1: subs r2, r2, #(28) + stmfd sp!, {r5 - r8} + blt 5f - CALGN( ands ip, r0, #31 ) - CALGN( sbcnes r4, ip, r2 ) @ C is always set here - CALGN( bcs 2f ) - CALGN( adr r4, 6f ) - CALGN( subs r2, r2, ip ) @ C is set here - CALGN( rsb ip, ip, #32 ) - CALGN( add pc, r4, ip ) + CALGN( ands ip, r0, #31 ) + CALGN( sbcnes r4, ip, r2 ) @ C is always set here + CALGN( bcs 2f ) + CALGN( adr r4, 6f ) + CALGN( subs r2, r2, ip ) @ C is set here + CALGN( rsb ip, ip, #32 ) + CALGN( add pc, r4, ip ) - PLD( pld [r1, #-4] ) -2: PLD( subs r2, r2, #96 ) - PLD( pld [r1, #-32] ) - PLD( blt 4f ) - PLD( pld [r1, #-64] ) - PLD( pld [r1, #-96] ) + PLD( pld [r1, #-4] ) +2: PLD( subs r2, r2, #96 ) + PLD( pld [r1, #-32] ) + PLD( blt 4f ) + PLD( pld [r1, #-64] ) + PLD( pld [r1, #-96] ) -3: PLD( pld [r1, #-128] ) -4: ldmdb r1!, {r3, r4, r5, r6, r7, r8, ip, lr} - subs r2, r2, #32 - stmdb r0!, {r3, r4, r5, r6, r7, r8, ip, lr} - bge 3b - PLD( cmn r2, #96 ) - PLD( bge 4b ) +3: PLD( pld [r1, #-128] ) +4: ldmdb r1!, {r3, r4, r5, r6, r7, r8, ip, lr} + subs r2, r2, #32 + stmdb r0!, {r3, r4, r5, r6, r7, r8, ip, lr} + bge 3b + PLD( cmn r2, #96 ) + PLD( bge 4b ) -5: ands ip, r2, #28 - rsb ip, ip, #32 - addne pc, pc, ip @ C is always clear here - b 7f -6: W(nop) - W(ldr) r3, [r1, #-4]! - W(ldr) r4, [r1, #-4]! - W(ldr) r5, [r1, #-4]! - W(ldr) r6, [r1, #-4]! - W(ldr) r7, [r1, #-4]! - W(ldr) r8, [r1, #-4]! - W(ldr) lr, [r1, #-4]! +5: ands ip, r2, #28 + rsb ip, ip, #32 + addne pc, pc, ip @ C is always clear here + b 7f +6: W(nop) + W(ldr) r3, [r1, #-4]! + W(ldr) r4, [r1, #-4]! + W(ldr) r5, [r1, #-4]! + W(ldr) r6, [r1, #-4]! + W(ldr) r7, [r1, #-4]! + W(ldr) r8, [r1, #-4]! + W(ldr) lr, [r1, #-4]! - add pc, pc, ip - nop - W(nop) - W(str) r3, [r0, #-4]! - W(str) r4, [r0, #-4]! - W(str) r5, [r0, #-4]! - W(str) r6, [r0, #-4]! - W(str) r7, [r0, #-4]! - W(str) r8, [r0, #-4]! - W(str) lr, [r0, #-4]! + add pc, pc, ip + nop + W(nop) + W(str) r3, [r0, #-4]! + W(str) r4, [r0, #-4]! + W(str) r5, [r0, #-4]! + W(str) r6, [r0, #-4]! + W(str) r7, [r0, #-4]! + W(str) r8, [r0, #-4]! + W(str) lr, [r0, #-4]! - CALGN( bcs 2b ) + CALGN( bcs 2b ) -7: ldmfd sp!, {r5 - r8} +7: ldmfd sp!, {r5 - r8} -8: movs r2, r2, lsl #31 - ldrneb r3, [r1, #-1]! - ldrcsb r4, [r1, #-1]! - ldrcsb ip, [r1, #-1] - strneb r3, [r0, #-1]! - strcsb r4, [r0, #-1]! - strcsb ip, [r0, #-1] - ldmfd sp!, {r0, r4, pc} +8: movs r2, r2, lsl #31 + ldrneb r3, [r1, #-1]! + ldrcsb r4, [r1, #-1]! + ldrcsb ip, [r1, #-1] + strneb r3, [r0, #-1]! + strcsb r4, [r0, #-1]! + strcsb ip, [r0, #-1] + ldmfd sp!, {r0, r4, pc} -9: cmp ip, #2 - ldrgtb r3, [r1, #-1]! - ldrgeb r4, [r1, #-1]! - ldrb lr, [r1, #-1]! - strgtb r3, [r0, #-1]! - strgeb r4, [r0, #-1]! - subs r2, r2, ip - strb lr, [r0, #-1]! - blt 8b - ands ip, r1, #3 - beq 1b +9: cmp ip, #2 + ldrgtb r3, [r1, #-1]! + ldrgeb r4, [r1, #-1]! + ldrb lr, [r1, #-1]! + strgtb r3, [r0, #-1]! + strgeb r4, [r0, #-1]! + subs r2, r2, ip + strb lr, [r0, #-1]! + blt 8b + ands ip, r1, #3 + beq 1b -10: bic r1, r1, #3 - cmp ip, #2 - ldr r3, [r1, #0] - beq 17f - blt 18f +10: bic r1, r1, #3 + cmp ip, #2 + ldr r3, [r1, #0] + beq 17f + blt 18f - .macro backward_copy_shift push pull + .macro backward_copy_shift push pull - subs r2, r2, #28 - blt 14f + subs r2, r2, #28 + blt 14f - CALGN( ands ip, r0, #31 ) - CALGN( sbcnes r4, ip, r2 ) @ C is always set here - CALGN( subcc r2, r2, ip ) - CALGN( bcc 15f ) + CALGN( ands ip, r0, #31 ) + CALGN( sbcnes r4, ip, r2 ) @ C is always set here + CALGN( subcc r2, r2, ip ) + CALGN( bcc 15f ) -11: stmfd sp!, {r5 - r9} +11: stmfd sp!, {r5 - r9} - PLD( pld [r1, #-4] ) - PLD( subs r2, r2, #96 ) - PLD( pld [r1, #-32] ) - PLD( blt 13f ) - PLD( pld [r1, #-64] ) - PLD( pld [r1, #-96] ) + PLD( pld [r1, #-4] ) + PLD( subs r2, r2, #96 ) + PLD( pld [r1, #-32] ) + PLD( blt 13f ) + PLD( pld [r1, #-64] ) + PLD( pld [r1, #-96] ) -12: PLD( pld [r1, #-128] ) -13: ldmdb r1!, {r7, r8, r9, ip} - mov lr, r3, push #\push - subs r2, r2, #32 - ldmdb r1!, {r3, r4, r5, r6} - orr lr, lr, ip, pull #\pull - mov ip, ip, push #\push - orr ip, ip, r9, pull #\pull - mov r9, r9, push #\push - orr r9, r9, r8, pull #\pull - mov r8, r8, push #\push - orr r8, r8, r7, pull #\pull - mov r7, r7, push #\push - orr r7, r7, r6, pull #\pull - mov r6, r6, push #\push - orr r6, r6, r5, pull #\pull - mov r5, r5, push #\push - orr r5, r5, r4, pull #\pull - mov r4, r4, push #\push - orr r4, r4, r3, pull #\pull - stmdb r0!, {r4 - r9, ip, lr} - bge 12b - PLD( cmn r2, #96 ) - PLD( bge 13b ) +12: PLD( pld [r1, #-128] ) +13: ldmdb r1!, {r7, r8, r9, ip} + mov lr, r3, push #\push + subs r2, r2, #32 + ldmdb r1!, {r3, r4, r5, r6} + orr lr, lr, ip, pull #\pull + mov ip, ip, push #\push + orr ip, ip, r9, pull #\pull + mov r9, r9, push #\push + orr r9, r9, r8, pull #\pull + mov r8, r8, push #\push + orr r8, r8, r7, pull #\pull + mov r7, r7, push #\push + orr r7, r7, r6, pull #\pull + mov r6, r6, push #\push + orr r6, r6, r5, pull #\pull + mov r5, r5, push #\push + orr r5, r5, r4, pull #\pull + mov r4, r4, push #\push + orr r4, r4, r3, pull #\pull + stmdb r0!, {r4 - r9, ip, lr} + bge 12b + PLD( cmn r2, #96 ) + PLD( bge 13b ) - ldmfd sp!, {r5 - r9} + ldmfd sp!, {r5 - r9} -14: ands ip, r2, #28 - beq 16f +14: ands ip, r2, #28 + beq 16f -15: mov lr, r3, push #\push - ldr r3, [r1, #-4]! - subs ip, ip, #4 - orr lr, lr, r3, pull #\pull - str lr, [r0, #-4]! - bgt 15b - CALGN( cmp r2, #0 ) - CALGN( bge 11b ) +15: mov lr, r3, push #\push + ldr r3, [r1, #-4]! + subs ip, ip, #4 + orr lr, lr, r3, pull #\pull + str lr, [r0, #-4]! + bgt 15b + CALGN( cmp r2, #0 ) + CALGN( bge 11b ) -16: add r1, r1, #(\pull / 8) - b 8b +16: add r1, r1, #(\pull / 8) + b 8b - .endm + .endm - backward_copy_shift push=8 pull=24 + backward_copy_shift push=8 pull=24 -17: backward_copy_shift push=16 pull=16 +17: backward_copy_shift push=16 pull=16 -18: backward_copy_shift push=24 pull=8 +18: backward_copy_shift push=24 pull=8 ENDPROC(memmove) diff -r f3d137e3e6c7 -r 0ba87b95e80b xen/arch/arm/lib/memset.S --- a/xen/arch/arm/lib/memset.S Mon Feb 13 18:17:28 2012 +0000 +++ b/xen/arch/arm/lib/memset.S Mon Feb 13 17:26:08 2012 +0000 @@ -14,33 +14,33 @@ #include "assembler.h" - .text - .align 5 - .word 0 + .text + .align 5 + .word 0 -1: subs r2, r2, #4 @ 1 do we have enough - blt 5f @ 1 bytes to align with? - cmp r3, #2 @ 1 - strltb r1, [r0], #1 @ 1 - strleb r1, [r0], #1 @ 1 - strb r1, [r0], #1 @ 1 - add r2, r2, r3 @ 1 (r2 = r2 - (4 - r3)) +1: subs r2, r2, #4 @ 1 do we have enough + blt 5f @ 1 bytes to align with? + cmp r3, #2 @ 1 + strltb r1, [r0], #1 @ 1 + strleb r1, [r0], #1 @ 1 + strb r1, [r0], #1 @ 1 + add r2, r2, r3 @ 1 (r2 = r2 - (4 - r3)) /* * The pointer is now aligned and the length is adjusted. Try doing the * memset again. */ ENTRY(memset) - ands r3, r0, #3 @ 1 unaligned? - bne 1b @ 1 + ands r3, r0, #3 @ 1 unaligned? + bne 1b @ 1 /* * we know that the pointer in r0 is aligned to a word boundary. */ - orr r1, r1, r1, lsl #8 - orr r1, r1, r1, lsl #16 - mov r3, r1 - cmp r2, #16 - blt 4f + orr r1, r1, r1, lsl #8 + orr r1, r1, r1, lsl #16 + mov r3, r1 + cmp r2, #16 + blt 4f #if ! CALGN(1)+0 @@ -48,26 +48,26 @@ * We need an extra register for this loop - save the return address and * use the LR */ - str lr, [sp, #-4]! - mov ip, r1 - mov lr, r1 + str lr, [sp, #-4]! + mov ip, r1 + mov lr, r1 -2: subs r2, r2, #64 - stmgeia r0!, {r1, r3, ip, lr} @ 64 bytes at a time. - stmgeia r0!, {r1, r3, ip, lr} - stmgeia r0!, {r1, r3, ip, lr} - stmgeia r0!, {r1, r3, ip, lr} - bgt 2b - ldmeqfd sp!, {pc} @ Now <64 bytes to go. +2: subs r2, r2, #64 + stmgeia r0!, {r1, r3, ip, lr} @ 64 bytes at a time. + stmgeia r0!, {r1, r3, ip, lr} + stmgeia r0!, {r1, r3, ip, lr} + stmgeia r0!, {r1, r3, ip, lr} + bgt 2b + ldmeqfd sp!, {pc} @ Now <64 bytes to go. /* * No need to correct the count; we're only testing bits from now on */ - tst r2, #32 - stmneia r0!, {r1, r3, ip, lr} - stmneia r0!, {r1, r3, ip, lr} - tst r2, #16 - stmneia r0!, {r1, r3, ip, lr} - ldr lr, [sp], #4 + tst r2, #32 + stmneia r0!, {r1, r3, ip, lr} + stmneia r0!, {r1, r3, ip, lr} + tst r2, #16 + stmneia r0!, {r1, r3, ip, lr} + ldr lr, [sp], #4 #else @@ -76,54 +76,54 @@ * whole cache lines at once. */ - stmfd sp!, {r4-r7, lr} - mov r4, r1 - mov r5, r1 - mov r6, r1 - mov r7, r1 - mov ip, r1 - mov lr, r1 + stmfd sp!, {r4-r7, lr} + mov r4, r1 + mov r5, r1 + mov r6, r1 + mov r7, r1 + mov ip, r1 + mov lr, r1 - cmp r2, #96 - tstgt r0, #31 - ble 3f + cmp r2, #96 + tstgt r0, #31 + ble 3f - and ip, r0, #31 - rsb ip, ip, #32 - sub r2, r2, ip - movs ip, ip, lsl #(32 - 4) - stmcsia r0!, {r4, r5, r6, r7} - stmmiia r0!, {r4, r5} - tst ip, #(1 << 30) - mov ip, r1 - strne r1, [r0], #4 + and ip, r0, #31 + rsb ip, ip, #32 + sub r2, r2, ip + movs ip, ip, lsl #(32 - 4) + stmcsia r0!, {r4, r5, r6, r7} + stmmiia r0!, {r4, r5} + tst ip, #(1 << 30) + mov ip, r1 + strne r1, [r0], #4 -3: subs r2, r2, #64 - stmgeia r0!, {r1, r3-r7, ip, lr} - stmgeia r0!, {r1, r3-r7, ip, lr} - bgt 3b - ldmeqfd sp!, {r4-r7, pc} +3: subs r2, r2, #64 + stmgeia r0!, {r1, r3-r7, ip, lr} + stmgeia r0!, {r1, r3-r7, ip, lr} + bgt 3b + ldmeqfd sp!, {r4-r7, pc} - tst r2, #32 - stmneia r0!, {r1, r3-r7, ip, lr} - tst r2, #16 - stmneia r0!, {r4-r7} - ldmfd sp!, {r4-r7, lr} + tst r2, #32 + stmneia r0!, {r1, r3-r7, ip, lr} + tst r2, #16 + stmneia r0!, {r4-r7} + ldmfd sp!, {r4-r7, lr} #endif -4: tst r2, #8 - stmneia r0!, {r1, r3} - tst r2, #4 - strne r1, [r0], #4 +4: tst r2, #8 + stmneia r0!, {r1, r3} + tst r2, #4 + strne r1, [r0], #4 /* * When we get here, we've got less than 4 bytes to zero. We * may have an unaligned pointer as well. */ -5: tst r2, #2 - strneb r1, [r0], #1 - strneb r1, [r0], #1 - tst r2, #1 - strneb r1, [r0], #1 - mov pc, lr +5: tst r2, #2 + strneb r1, [r0], #1 + strneb r1, [r0], #1 + tst r2, #1 + strneb r1, [r0], #1 + mov pc, lr ENDPROC(memset) diff -r f3d137e3e6c7 -r 0ba87b95e80b xen/arch/arm/lib/memzero.S --- a/xen/arch/arm/lib/memzero.S Mon Feb 13 18:17:28 2012 +0000 +++ b/xen/arch/arm/lib/memzero.S Mon Feb 13 17:26:08 2012 +0000 @@ -12,35 +12,35 @@ #include "assembler.h" - .text - .align 5 - .word 0 + .text + .align 5 + .word 0 /* * Align the pointer in r0. r3 contains the number of bytes that we are * mis-aligned by, and r1 is the number of bytes. If r1 < 4, then we * don't bother; we use byte stores instead. */ -1: subs r1, r1, #4 @ 1 do we have enough - blt 5f @ 1 bytes to align with? - cmp r3, #2 @ 1 - strltb r2, [r0], #1 @ 1 - strleb r2, [r0], #1 @ 1 - strb r2, [r0], #1 @ 1 - add r1, r1, r3 @ 1 (r1 = r1 - (4 - r3)) +1: subs r1, r1, #4 @ 1 do we have enough + blt 5f @ 1 bytes to align with? + cmp r3, #2 @ 1 + strltb r2, [r0], #1 @ 1 + strleb r2, [r0], #1 @ 1 + strb r2, [r0], #1 @ 1 + add r1, r1, r3 @ 1 (r1 = r1 - (4 - r3)) /* * The pointer is now aligned and the length is adjusted. Try doing the * memzero again. */ ENTRY(__memzero) - mov r2, #0 @ 1 - ands r3, r0, #3 @ 1 unaligned? - bne 1b @ 1 + mov r2, #0 @ 1 + ands r3, r0, #3 @ 1 unaligned? + bne 1b @ 1 /* * r3 = 0, and we know that the pointer in r0 is aligned to a word boundary. */ - cmp r1, #16 @ 1 we can skip this chunk if we - blt 4f @ 1 have < 16 bytes + cmp r1, #16 @ 1 we can skip this chunk if we + blt 4f @ 1 have < 16 bytes #if ! CALGN(1)+0 @@ -48,26 +48,26 @@ * We need an extra register for this loop - save the return address and * use the LR */ - str lr, [sp, #-4]! @ 1 - mov ip, r2 @ 1 - mov lr, r2 @ 1 + str lr, [sp, #-4]! @ 1 + mov ip, r2 @ 1 + mov lr, r2 @ 1 -3: subs r1, r1, #64 @ 1 write 32 bytes out per loop - stmgeia r0!, {r2, r3, ip, lr} @ 4 - stmgeia r0!, {r2, r3, ip, lr} @ 4 - stmgeia r0!, {r2, r3, ip, lr} @ 4 - stmgeia r0!, {r2, r3, ip, lr} @ 4 - bgt 3b @ 1 - ldmeqfd sp!, {pc} @ 1/2 quick exit +3: subs r1, r1, #64 @ 1 write 32 bytes out per loop + stmgeia r0!, {r2, r3, ip, lr} @ 4 + stmgeia r0!, {r2, r3, ip, lr} @ 4 + stmgeia r0!, {r2, r3, ip, lr} @ 4 + stmgeia r0!, {r2, r3, ip, lr} @ 4 + bgt 3b @ 1 + ldmeqfd sp!, {pc} @ 1/2 quick exit /* * No need to correct the count; we're only testing bits from now on */ - tst r1, #32 @ 1 - stmneia r0!, {r2, r3, ip, lr} @ 4 - stmneia r0!, {r2, r3, ip, lr} @ 4 - tst r1, #16 @ 1 16 bytes or more? - stmneia r0!, {r2, r3, ip, lr} @ 4 - ldr lr, [sp], #4 @ 1 + tst r1, #32 @ 1 + stmneia r0!, {r2, r3, ip, lr} @ 4 + stmneia r0!, {r2, r3, ip, lr} @ 4 + tst r1, #16 @ 1 16 bytes or more? + stmneia r0!, {r2, r3, ip, lr} @ 4 + ldr lr, [sp], #4 @ 1 #else @@ -76,52 +76,52 @@ * whole cache lines at once. */ - stmfd sp!, {r4-r7, lr} - mov r4, r2 - mov r5, r2 - mov r6, r2 - mov r7, r2 - mov ip, r2 - mov lr, r2 + stmfd sp!, {r4-r7, lr} + mov r4, r2 + mov r5, r2 + mov r6, r2 + mov r7, r2 + mov ip, r2 + mov lr, r2 - cmp r1, #96 - andgts ip, r0, #31 - ble 3f + cmp r1, #96 + andgts ip, r0, #31 + ble 3f - rsb ip, ip, #32 - sub r1, r1, ip - movs ip, ip, lsl #(32 - 4) - stmcsia r0!, {r4, r5, r6, r7} - stmmiia r0!, {r4, r5} - movs ip, ip, lsl #2 - strcs r2, [r0], #4 + rsb ip, ip, #32 + sub r1, r1, ip + movs ip, ip, lsl #(32 - 4) + stmcsia r0!, {r4, r5, r6, r7} + stmmiia r0!, {r4, r5} + movs ip, ip, lsl #2 + strcs r2, [r0], #4 -3: subs r1, r1, #64 - stmgeia r0!, {r2-r7, ip, lr} - stmgeia r0!, {r2-r7, ip, lr} - bgt 3b - ldmeqfd sp!, {r4-r7, pc} +3: subs r1, r1, #64 + stmgeia r0!, {r2-r7, ip, lr} + stmgeia r0!, {r2-r7, ip, lr} + bgt 3b + ldmeqfd sp!, {r4-r7, pc} - tst r1, #32 - stmneia r0!, {r2-r7, ip, lr} - tst r1, #16 - stmneia r0!, {r4-r7} - ldmfd sp!, {r4-r7, lr} + tst r1, #32 + stmneia r0!, {r2-r7, ip, lr} + tst r1, #16 + stmneia r0!, {r4-r7} + ldmfd sp!, {r4-r7, lr} #endif -4: tst r1, #8 @ 1 8 bytes or more? - stmneia r0!, {r2, r3} @ 2 - tst r1, #4 @ 1 4 bytes or more? - strne r2, [r0], #4 @ 1 +4: tst r1, #8 @ 1 8 bytes or more? + stmneia r0!, {r2, r3} @ 2 + tst r1, #4 @ 1 4 bytes or more? + strne r2, [r0], #4 @ 1 /* * When we get here, we've got less than 4 bytes to zero. We * may have an unaligned pointer as well. */ -5: tst r1, #2 @ 1 2 bytes or more? - strneb r2, [r0], #1 @ 1 - strneb r2, [r0], #1 @ 1 - tst r1, #1 @ 1 a byte left over - strneb r2, [r0], #1 @ 1 - mov pc, lr @ 1 +5: tst r1, #2 @ 1 2 bytes or more? + strneb r2, [r0], #1 @ 1 + strneb r2, [r0], #1 @ 1 + tst r1, #1 @ 1 a byte left over + strneb r2, [r0], #1 @ 1 + mov pc, lr @ 1 ENDPROC(__memzero) diff -r f3d137e3e6c7 -r 0ba87b95e80b xen/arch/arm/lib/setbit.S --- a/xen/arch/arm/lib/setbit.S Mon Feb 13 18:17:28 2012 +0000 +++ b/xen/arch/arm/lib/setbit.S Mon Feb 13 17:26:08 2012 +0000 @@ -11,8 +11,8 @@ #include "assembler.h" #include "bitops.h" - .text + .text ENTRY(_set_bit) - bitop orr + bitop orr ENDPROC(_set_bit) diff -r f3d137e3e6c7 -r 0ba87b95e80b xen/arch/arm/lib/testchangebit.S --- a/xen/arch/arm/lib/testchangebit.S Mon Feb 13 18:17:28 2012 +0000 +++ b/xen/arch/arm/lib/testchangebit.S Mon Feb 13 17:26:08 2012 +0000 @@ -14,5 +14,5 @@ .text ENTRY(_test_and_change_bit) - testop eor, str + testop eor, str ENDPROC(_test_and_change_bit) diff -r f3d137e3e6c7 -r 0ba87b95e80b xen/arch/arm/lib/testclearbit.S --- a/xen/arch/arm/lib/testclearbit.S Mon Feb 13 18:17:28 2012 +0000 +++ b/xen/arch/arm/lib/testclearbit.S Mon Feb 13 17:26:08 2012 +0000 @@ -14,5 +14,5 @@ .text ENTRY(_test_and_clear_bit) - testop bicne, strne + testop bicne, strne ENDPROC(_test_and_clear_bit) diff -r f3d137e3e6c7 -r 0ba87b95e80b xen/arch/arm/lib/testsetbit.S --- a/xen/arch/arm/lib/testsetbit.S Mon Feb 13 18:17:28 2012 +0000 +++ b/xen/arch/arm/lib/testsetbit.S Mon Feb 13 17:26:08 2012 +0000 @@ -14,5 +14,5 @@ .text ENTRY(_test_and_set_bit) - testop orreq, streq + testop orreq, streq ENDPROC(_test_and_set_bit) diff -r f3d137e3e6c7 -r 0ba87b95e80b xen/include/asm-arm/bitops.h --- a/xen/include/asm-arm/bitops.h Mon Feb 13 18:17:28 2012 +0000 +++ b/xen/include/asm-arm/bitops.h Mon Feb 13 17:26:08 2012 +0000 @@ -115,19 +115,19 @@ /* * These are the little endian, atomic definitions. */ -#define find_first_zero_bit(p,sz) _find_first_zero_bit_le(p,sz) -#define find_next_zero_bit(p,sz,off) _find_next_zero_bit_le(p,sz,off) -#define find_first_bit(p,sz) _find_first_bit_le(p,sz) -#define find_next_bit(p,sz,off) _find_next_bit_le(p,sz,off) +#define find_first_zero_bit(p,sz) _find_first_zero_bit_le(p,sz) +#define find_next_zero_bit(p,sz,off) _find_next_zero_bit_le(p,sz,off) +#define find_first_bit(p,sz) _find_first_bit_le(p,sz) +#define find_next_bit(p,sz,off) _find_next_bit_le(p,sz,off) #else /* * These are the big endian, atomic definitions. */ -#define find_first_zero_bit(p,sz) _find_first_zero_bit_be(p,sz) -#define find_next_zero_bit(p,sz,off) _find_next_zero_bit_be(p,sz,off) -#define find_first_bit(p,sz) _find_first_bit_be(p,sz) -#define find_next_bit(p,sz,off) _find_next_bit_be(p,sz,off) +#define find_first_zero_bit(p,sz) _find_first_zero_bit_be(p,sz) +#define find_next_zero_bit(p,sz,off) _find_next_zero_bit_be(p,sz,off) +#define find_first_bit(p,sz) _find_first_bit_be(p,sz) +#define find_next_bit(p,sz,off) _find_next_bit_be(p,sz,off) #endif diff -r f3d137e3e6c7 -r 0ba87b95e80b xen/include/asm-arm/div64.h --- a/xen/include/asm-arm/div64.h Mon Feb 13 18:17:28 2012 +0000 +++ b/xen/include/asm-arm/div64.h Mon Feb 13 17:26:08 2012 +0000 @@ -10,9 +10,9 @@ * * uint32_t do_div(uint64_t *n, uint32_t base) * { - * uint32_t remainder = *n % base; - * *n = *n / base; - * return remainder; + * uint32_t remainder = *n % base; + * *n = *n / base; + * return remainder; * } * * In other words, a 64-bit dividend with a 32-bit divisor producing @@ -29,22 +29,22 @@ #define __xh "r1" #endif -#define __do_div_asm(n, base) \ -({ \ - register unsigned int __base asm("r4") = base; \ - register unsigned long long __n asm("r0") = n; \ - register unsigned long long __res asm("r2"); \ - register unsigned int __rem asm(__xh); \ - asm( __asmeq("%0", __xh) \ - __asmeq("%1", "r2") \ - __asmeq("%2", "r0") \ - __asmeq("%3", "r4") \ - "bl __do_div64" \ - : "=r" (__rem), "=r" (__res) \ - : "r" (__n), "r" (__base) \ - : "ip", "lr", "cc"); \ - n = __res; \ - __rem; \ +#define __do_div_asm(n, base) \ +({ \ + register unsigned int __base asm("r4") = base; \ + register unsigned long long __n asm("r0") = n; \ + register unsigned long long __res asm("r2"); \ + register unsigned int __rem asm(__xh); \ + asm( __asmeq("%0", __xh) \ + __asmeq("%1", "r2") \ + __asmeq("%2", "r0") \ + __asmeq("%3", "r4") \ + "bl __do_div64" \ + : "=r" (__rem), "=r" (__res) \ + : "r" (__n), "r" (__base) \ + : "ip", "lr", "cc"); \ + n = __res; \ + __rem; \ }) #if __GNUC__ < 4 @@ -71,155 +71,155 @@ * sufficiently recent to perform proper long long constant propagation. * (It is unfortunate that gcc doesn't perform all this internally.) */ -#define do_div(n, base) \ -({ \ - unsigned int __r, __b = (base); \ - if (!__builtin_constant_p(__b) || __b == 0) { \ - /* non-constant divisor (or zero): slow path */ \ - __r = __do_div_asm(n, __b); \ - } else if ((__b & (__b - 1)) == 0) { \ - /* Trivial: __b is constant and a power of 2 */ \ - /* gcc does the right thing with this code. */ \ - __r = n; \ - __r &= (__b - 1); \ - n /= __b; \ - } else { \ - /* Multiply by inverse of __b: n/b = n*(p/b)/p */ \ - /* We rely on the fact that most of this code gets */ \ - /* optimized away at compile time due to constant */ \ - /* propagation and only a couple inline assembly */ \ - /* instructions should remain. Better avoid any */ \ - /* code construct that might prevent that. */ \ - unsigned long long __res, __x, __t, __m, __n = n; \ - unsigned int __c, __p, __z = 0; \ - /* preserve low part of n for reminder computation */ \ - __r = __n; \ - /* determine number of bits to represent __b */ \ - __p = 1 << __div64_fls(__b); \ - /* compute __m = ((__p << 64) + __b - 1) / __b */ \ - __m = (~0ULL / __b) * __p; \ - __m += (((~0ULL % __b + 1) * __p) + __b - 1) / __b; \ - /* compute __res = __m*(~0ULL/__b*__b-1)/(__p << 64) */ \ - __x = ~0ULL / __b * __b - 1; \ - __res = (__m & 0xffffffff) * (__x & 0xffffffff); \ - __res >>= 32; \ - __res += (__m & 0xffffffff) * (__x >> 32); \ - __t = __res; \ - __res += (__x & 0xffffffff) * (__m >> 32); \ - __t = (__res < __t) ? (1ULL << 32) : 0; \ - __res = (__res >> 32) + __t; \ - __res += (__m >> 32) * (__x >> 32); \ - __res /= __p; \ - /* Now sanitize and optimize what we've got. */ \ - if (~0ULL % (__b / (__b & -__b)) == 0) { \ - /* those cases can be simplified with: */ \ - __n /= (__b & -__b); \ - __m = ~0ULL / (__b / (__b & -__b)); \ - __p = 1; \ - __c = 1; \ - } else if (__res != __x / __b) { \ - /* We can't get away without a correction */ \ - /* to compensate for bit truncation errors. */ \ - /* To avoid it we'd need an additional bit */ \ - /* to represent __m which would overflow it. */ \ - /* Instead we do m=p/b and n/b=(n*m+m)/p. */ \ - __c = 1; \ - /* Compute __m = (__p << 64) / __b */ \ - __m = (~0ULL / __b) * __p; \ - __m += ((~0ULL % __b + 1) * __p) / __b; \ - } else { \ - /* Reduce __m/__p, and try to clear bit 31 */ \ - /* of __m when possible otherwise that'll */ \ - /* need extra overflow handling later. */ \ - unsigned int __bits = -(__m & -__m); \ - __bits |= __m >> 32; \ - __bits = (~__bits) << 1; \ - /* If __bits == 0 then setting bit 31 is */ \ - /* unavoidable. Simply apply the maximum */ \ - /* possible reduction in that case. */ \ - /* Otherwise the MSB of __bits indicates the */ \ - /* best reduction we should apply. */ \ - if (!__bits) { \ - __p /= (__m & -__m); \ - __m /= (__m & -__m); \ - } else { \ - __p >>= __div64_fls(__bits); \ - __m >>= __div64_fls(__bits); \ - } \ - /* No correction needed. */ \ - __c = 0; \ - } \ - /* Now we have a combination of 2 conditions: */ \ - /* 1) whether or not we need a correction (__c), and */ \ - /* 2) whether or not there might be an overflow in */ \ - /* the cross product (__m & ((1<<63) | (1<<31))) */ \ - /* Select the best insn combination to perform the */ \ - /* actual __m * __n / (__p << 64) operation. */ \ - if (!__c) { \ - asm ( "umull %Q0, %R0, %1, %Q2\n\t" \ - "mov %Q0, #0" \ - : "=&r" (__res) \ - : "r" (__m), "r" (__n) \ - : "cc" ); \ - } else if (!(__m & ((1ULL << 63) | (1ULL << 31)))) { \ - __res = __m; \ - asm ( "umlal %Q0, %R0, %Q1, %Q2\n\t" \ - "mov %Q0, #0" \ - : "+&r" (__res) \ - : "r" (__m), "r" (__n) \ - : "cc" ); \ - } else { \ - asm ( "umull %Q0, %R0, %Q1, %Q2\n\t" \ - "cmn %Q0, %Q1\n\t" \ - "adcs %R0, %R0, %R1\n\t" \ - "adc %Q0, %3, #0" \ - : "=&r" (__res) \ - : "r" (__m), "r" (__n), "r" (__z) \ - : "cc" ); \ - } \ - if (!(__m & ((1ULL << 63) | (1ULL << 31)))) { \ - asm ( "umlal %R0, %Q0, %R1, %Q2\n\t" \ - "umlal %R0, %Q0, %Q1, %R2\n\t" \ - "mov %R0, #0\n\t" \ - "umlal %Q0, %R0, %R1, %R2" \ - : "+&r" (__res) \ - : "r" (__m), "r" (__n) \ - : "cc" ); \ - } else { \ - asm ( "umlal %R0, %Q0, %R2, %Q3\n\t" \ - "umlal %R0, %1, %Q2, %R3\n\t" \ - "mov %R0, #0\n\t" \ - "adds %Q0, %1, %Q0\n\t" \ - "adc %R0, %R0, #0\n\t" \ - "umlal %Q0, %R0, %R2, %R3" \ - : "+&r" (__res), "+&r" (__z) \ - : "r" (__m), "r" (__n) \ - : "cc" ); \ - } \ - __res /= __p; \ - /* The reminder can be computed with 32-bit regs */ \ - /* only, and gcc is good at that. */ \ - { \ - unsigned int __res0 = __res; \ - unsigned int __b0 = __b; \ - __r -= __res0 * __b0; \ - } \ - /* BUG_ON(__r >= __b || __res * __b + __r != n); */ \ - n = __res; \ - } \ - __r; \ +#define do_div(n, base) \ +({ \ + unsigned int __r, __b = (base); \ + if (!__builtin_constant_p(__b) || __b == 0) { \ + /* non-constant divisor (or zero): slow path */ \ + __r = __do_div_asm(n, __b); \ + } else if ((__b & (__b - 1)) == 0) { \ + /* Trivial: __b is constant and a power of 2 */ \ + /* gcc does the right thing with this code. */ \ + __r = n; \ + __r &= (__b - 1); \ + n /= __b; \ + } else { \ + /* Multiply by inverse of __b: n/b = n*(p/b)/p */ \ + /* We rely on the fact that most of this code gets */ \ + /* optimized away at compile time due to constant */ \ + /* propagation and only a couple inline assembly */ \ + /* instructions should remain. Better avoid any */ \ + /* code construct that might prevent that. */ \ + unsigned long long __res, __x, __t, __m, __n = n; \ + unsigned int __c, __p, __z = 0; \ + /* preserve low part of n for reminder computation */ \ + __r = __n; \ + /* determine number of bits to represent __b */ \ + __p = 1 << __div64_fls(__b); \ + /* compute __m = ((__p << 64) + __b - 1) / __b */ \ + __m = (~0ULL / __b) * __p; \ + __m += (((~0ULL % __b + 1) * __p) + __b - 1) / __b; \ + /* compute __res = __m*(~0ULL/__b*__b-1)/(__p << 64) */ \ + __x = ~0ULL / __b * __b - 1; \ + __res = (__m & 0xffffffff) * (__x & 0xffffffff); \ + __res >>= 32; \ + __res += (__m & 0xffffffff) * (__x >> 32); \ + __t = __res; \ + __res += (__x & 0xffffffff) * (__m >> 32); \ + __t = (__res < __t) ? (1ULL << 32) : 0; \ + __res = (__res >> 32) + __t; \ + __res += (__m >> 32) * (__x >> 32); \ + __res /= __p; \ + /* Now sanitize and optimize what we've got. */ \ + if (~0ULL % (__b / (__b & -__b)) == 0) { \ + /* those cases can be simplified with: */ \ + __n /= (__b & -__b); \ + __m = ~0ULL / (__b / (__b & -__b)); \ + __p = 1; \ + __c = 1; \ + } else if (__res != __x / __b) { \ + /* We can't get away without a correction */ \ + /* to compensate for bit truncation errors. */ \ + /* To avoid it we'd need an additional bit */ \ + /* to represent __m which would overflow it. */ \ + /* Instead we do m=p/b and n/b=(n*m+m)/p. */ \ + __c = 1; \ + /* Compute __m = (__p << 64) / __b */ \ + __m = (~0ULL / __b) * __p; \ + __m += ((~0ULL % __b + 1) * __p) / __b; \ + } else { \ + /* Reduce __m/__p, and try to clear bit 31 */ \ + /* of __m when possible otherwise that'll */ \ + /* need extra overflow handling later. */ \ + unsigned int __bits = -(__m & -__m); \ + __bits |= __m >> 32; \ + __bits = (~__bits) << 1; \ + /* If __bits == 0 then setting bit 31 is */ \ + /* unavoidable. Simply apply the maximum */ \ + /* possible reduction in that case. */ \ + /* Otherwise the MSB of __bits indicates the */ \ + /* best reduction we should apply. */ \ + if (!__bits) { \ + __p /= (__m & -__m); \ + __m /= (__m & -__m); \ + } else { \ + __p >>= __div64_fls(__bits); \ + __m >>= __div64_fls(__bits); \ + } \ + /* No correction needed. */ \ + __c = 0; \ + } \ + /* Now we have a combination of 2 conditions: */ \ + /* 1) whether or not we need a correction (__c), and */ \ + /* 2) whether or not there might be an overflow in */ \ + /* the cross product (__m & ((1<<63) | (1<<31))) */ \ + /* Select the best insn combination to perform the */ \ + /* actual __m * __n / (__p << 64) operation. */ \ + if (!__c) { \ + asm ( "umull %Q0, %R0, %1, %Q2\n\t" \ + "mov %Q0, #0" \ + : "=&r" (__res) \ + : "r" (__m), "r" (__n) \ + : "cc" ); \ + } else if (!(__m & ((1ULL << 63) | (1ULL << 31)))) { \ + __res = __m; \ + asm ( "umlal %Q0, %R0, %Q1, %Q2\n\t" \ + "mov %Q0, #0" \ + : "+&r" (__res) \ + : "r" (__m), "r" (__n) \ + : "cc" ); \ + } else { \ + asm ( "umull %Q0, %R0, %Q1, %Q2\n\t" \ + "cmn %Q0, %Q1\n\t" \ + "adcs %R0, %R0, %R1\n\t" \ + "adc %Q0, %3, #0" \ + : "=&r" (__res) \ + : "r" (__m), "r" (__n), "r" (__z) \ + : "cc" ); \ + } \ + if (!(__m & ((1ULL << 63) | (1ULL << 31)))) { \ + asm ( "umlal %R0, %Q0, %R1, %Q2\n\t" \ + "umlal %R0, %Q0, %Q1, %R2\n\t" \ + "mov %R0, #0\n\t" \ + "umlal %Q0, %R0, %R1, %R2" \ + : "+&r" (__res) \ + : "r" (__m), "r" (__n) \ + : "cc" ); \ + } else { \ + asm ( "umlal %R0, %Q0, %R2, %Q3\n\t" \ + "umlal %R0, %1, %Q2, %R3\n\t" \ + "mov %R0, #0\n\t" \ + "adds %Q0, %1, %Q0\n\t" \ + "adc %R0, %R0, #0\n\t" \ + "umlal %Q0, %R0, %R2, %R3" \ + : "+&r" (__res), "+&r" (__z) \ + : "r" (__m), "r" (__n) \ + : "cc" ); \ + } \ + __res /= __p; \ + /* The reminder can be computed with 32-bit regs */ \ + /* only, and gcc is good at that. */ \ + { \ + unsigned int __res0 = __res; \ + unsigned int __b0 = __b; \ + __r -= __res0 * __b0; \ + } \ + /* BUG_ON(__r >= __b || __res * __b + __r != n); */ \ + n = __res; \ + } \ + __r; \ }) /* our own fls implementation to make sure constant propagation is fine */ -#define __div64_fls(bits) \ -({ \ - unsigned int __left = (bits), __nr = 0; \ - if (__left & 0xffff0000) __nr += 16, __left >>= 16; \ - if (__left & 0x0000ff00) __nr += 8, __left >>= 8; \ - if (__left & 0x000000f0) __nr += 4, __left >>= 4; \ - if (__left & 0x0000000c) __nr += 2, __left >>= 2; \ - if (__left & 0x00000002) __nr += 1; \ - __nr; \ +#define __div64_fls(bits) \ +({ \ + unsigned int __left = (bits), __nr = 0; \ + if (__left & 0xffff0000) __nr += 16, __left >>= 16; \ + if (__left & 0x0000ff00) __nr += 8, __left >>= 8; \ + if (__left & 0x000000f0) __nr += 4, __left >>= 4; \ + if (__left & 0x0000000c) __nr += 2, __left >>= 2; \ + if (__left & 0x00000002) __nr += 1; \ + __nr; \ }) #endif diff -r f3d137e3e6c7 -r 0ba87b95e80b xen/include/asm-arm/numa.h --- a/xen/include/asm-arm/numa.h Mon Feb 13 18:17:28 2012 +0000 +++ b/xen/include/asm-arm/numa.h Mon Feb 13 17:26:08 2012 +0000 @@ -3,7 +3,7 @@ /* Fake one node for now... */ #define cpu_to_node(cpu) 0 -#define node_to_cpumask(node) (cpu_online_map) +#define node_to_cpumask(node) (cpu_online_map) static inline __attribute__((pure)) int phys_to_nid(paddr_t addr) { _______________________________________________ Xen-changelog mailing list Xen-changelog@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-changelog
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |