[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-changelog] [xen-unstable] xen: arm: introduce arm32 as a subarch of arm.


  • To: xen-changelog@xxxxxxxxxxxxxxxxxxx
  • From: Xen patchbot-unstable <patchbot@xxxxxxx>
  • Date: Thu, 20 Dec 2012 16:11:18 +0000
  • Delivery-date: Thu, 20 Dec 2012 16:12:01 +0000
  • List-id: "Change log for Mercurial \(receive only\)" <xen-changelog.lists.xen.org>

# HG changeset patch
# User Ian Campbell <ian.campbell@xxxxxxxxxx>
# Date 1355926590 0
# Node ID b094ba4bf9853e408a710664cb2995b4737cf165
# Parent  984086ca8ca0de17b5cd3253bc9579d072ec43bc
xen: arm: introduce arm32 as a subarch of arm.

- move 32-bit specific files into subarch specific arm32 subdirectory.
- move gic.h to xen/include/asm-arm (it is needed from both subarch
  and generic code).
- make the appropriate build and config file changes to support
  XEN_TARGET_ARCH=arm32.

This prepares us for an eventual 64-bit subarch.

Signed-off-by: Ian Campbell <ian.campbell@xxxxxxxxxx>
Acked-by: Stefano Stabellini <stefano.stabellini@xxxxxxxxxxxxx>
Committed-by: Ian Campbell <ian.campbell@xxxxxxxxxx>
---


diff -r 984086ca8ca0 -r b094ba4bf985 Config.mk
--- a/Config.mk Wed Dec 19 14:16:29 2012 +0000
+++ b/Config.mk Wed Dec 19 14:16:30 2012 +0000
@@ -14,7 +14,9 @@ debug ?= y
 debug_symbols ?= $(debug)
 
 XEN_COMPILE_ARCH    ?= $(shell uname -m | sed -e s/i.86/x86_32/ \
-                         -e s/i86pc/x86_32/ -e s/amd64/x86_64/ -e s/arm.*/arm/)
+                         -e s/i86pc/x86_32/ -e s/amd64/x86_64/ \
+                         -e s/armv7.*/arm32/)
+
 XEN_TARGET_ARCH     ?= $(XEN_COMPILE_ARCH)
 XEN_OS              ?= $(shell uname -s)
 
diff -r 984086ca8ca0 -r b094ba4bf985 config/arm.mk
--- a/config/arm.mk     Wed Dec 19 14:16:29 2012 +0000
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,18 +0,0 @@
-CONFIG_ARM := y
-CONFIG_ARM_32 := y
-CONFIG_ARM_$(XEN_OS) := y
-
-# -march= -mcpu=
-
-# Explicitly specifiy 32-bit ARM ISA since toolchain default can be -mthumb:
-CFLAGS += -marm
-
-HAS_PL011 := y
-
-# Use only if calling $(LD) directly.
-#LDFLAGS_DIRECT_OpenBSD = _obsd
-#LDFLAGS_DIRECT_FreeBSD = _fbsd
-LDFLAGS_DIRECT_Linux = _linux
-LDFLAGS_DIRECT += -marmelf$(LDFLAGS_DIRECT_$(XEN_OS))_eabi
-
-CONFIG_LOAD_ADDRESS ?= 0x80000000
diff -r 984086ca8ca0 -r b094ba4bf985 config/arm32.mk
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/config/arm32.mk   Wed Dec 19 14:16:30 2012 +0000
@@ -0,0 +1,18 @@
+CONFIG_ARM := y
+CONFIG_ARM_32 := y
+CONFIG_ARM_$(XEN_OS) := y
+
+# -march= -mcpu=
+
+# Explicitly specifiy 32-bit ARM ISA since toolchain default can be -mthumb:
+CFLAGS += -marm
+
+HAS_PL011 := y
+
+# Use only if calling $(LD) directly.
+#LDFLAGS_DIRECT_OpenBSD = _obsd
+#LDFLAGS_DIRECT_FreeBSD = _fbsd
+LDFLAGS_DIRECT_Linux = _linux
+LDFLAGS_DIRECT += -marmelf$(LDFLAGS_DIRECT_$(XEN_OS))_eabi
+
+CONFIG_LOAD_ADDRESS ?= 0x80000000
diff -r 984086ca8ca0 -r b094ba4bf985 xen/Rules.mk
--- a/xen/Rules.mk      Wed Dec 19 14:16:29 2012 +0000
+++ b/xen/Rules.mk      Wed Dec 19 14:16:30 2012 +0000
@@ -28,7 +28,7 @@ endif
 # Set ARCH/SUBARCH appropriately.
 override TARGET_SUBARCH  := $(XEN_TARGET_ARCH)
 override TARGET_ARCH     := $(shell echo $(XEN_TARGET_ARCH) | \
-                              sed -e 's/x86.*/x86/')
+                              sed -e 's/x86.*/x86/' -e s'/arm\(32\|64\)/arm/g')
 
 TARGET := $(BASEDIR)/xen
 
diff -r 984086ca8ca0 -r b094ba4bf985 xen/arch/arm/Makefile
--- a/xen/arch/arm/Makefile     Wed Dec 19 14:16:29 2012 +0000
+++ b/xen/arch/arm/Makefile     Wed Dec 19 14:16:30 2012 +0000
@@ -1,8 +1,7 @@
-subdir-y += lib
+subdir-$(arm32) += arm32
 
 obj-y += dummy.o
 obj-y += early_printk.o
-obj-y += entry.o
 obj-y += domain.o
 obj-y += domctl.o
 obj-y += sysctl.o
@@ -12,8 +11,6 @@ obj-y += io.o
 obj-y += irq.o
 obj-y += kernel.o
 obj-y += mm.o
-obj-y += mode_switch.o
-obj-y += proc-ca15.o
 obj-y += p2m.o
 obj-y += percpu.o
 obj-y += guestcopy.o
@@ -36,7 +33,7 @@ obj-y += dtb.o
 AFLAGS += -DCONFIG_DTB_FILE=\"$(CONFIG_DTB_FILE)\"
 endif
 
-ALL_OBJS := head.o $(ALL_OBJS)
+ALL_OBJS := $(TARGET_SUBARCH)/head.o $(ALL_OBJS)
 
 $(TARGET): $(TARGET)-syms $(TARGET).bin
        # XXX: VE model loads by VMA so instead of
@@ -81,7 +78,7 @@ endif
            $(@D)/.$(@F).1.o -o $@
        rm -f $(@D)/.$(@F).[0-9]*
 
-asm-offsets.s: asm-offsets.c
+asm-offsets.s: $(TARGET_SUBARCH)/asm-offsets.c
        $(CC) $(filter-out -flto,$(CFLAGS)) -S -o $@ $<
 
 xen.lds: xen.lds.S
diff -r 984086ca8ca0 -r b094ba4bf985 xen/arch/arm/Rules.mk
--- a/xen/arch/arm/Rules.mk     Wed Dec 19 14:16:29 2012 +0000
+++ b/xen/arch/arm/Rules.mk     Wed Dec 19 14:16:30 2012 +0000
@@ -12,16 +12,19 @@ CFLAGS += -fno-builtin -fno-common -Wred
 CFLAGS += -iwithprefix include -Werror -Wno-pointer-arith -pipe
 CFLAGS += -I$(BASEDIR)/include
 
-# Prevent floating-point variables from creeping into Xen.
-CFLAGS += -msoft-float
-
 $(call cc-options-add,CFLAGS,CC,$(EMBEDDED_EXTRA_CFLAGS))
 $(call cc-option-add,CFLAGS,CC,-Wnested-externs)
 
 arm := y
 
+ifeq ($(TARGET_SUBARCH),arm32)
+# Prevent floating-point variables from creeping into Xen.
+CFLAGS += -msoft-float
+CFLAGS += -mcpu=cortex-a15 -mfpu=vfpv3 -mfloat-abi=softfp
+arm32 := y
+arm64 := n
+endif
+
 ifneq ($(call cc-option,$(CC),-fvisibility=hidden,n),n)
 CFLAGS += -DGCC_HAS_VISIBILITY_ATTRIBUTE
 endif
-
-CFLAGS += -mcpu=cortex-a15 -mfpu=vfpv3 -mfloat-abi=softfp
diff -r 984086ca8ca0 -r b094ba4bf985 xen/arch/arm/arm32/Makefile
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/arch/arm/arm32/Makefile       Wed Dec 19 14:16:30 2012 +0000
@@ -0,0 +1,5 @@
+subdir-y += lib
+
+obj-y += entry.o
+obj-y += mode_switch.o
+obj-y += proc-ca15.o
diff -r 984086ca8ca0 -r b094ba4bf985 xen/arch/arm/arm32/asm-offsets.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/arch/arm/arm32/asm-offsets.c  Wed Dec 19 14:16:30 2012 +0000
@@ -0,0 +1,80 @@
+/*
+ * Generate definitions needed by assembly language modules.
+ * This code generates raw asm output which is post-processed
+ * to extract and format the required data.
+ */
+#define COMPILE_OFFSETS
+
+#include <xen/config.h>
+#include <xen/types.h>
+#include <xen/sched.h>
+#include <public/xen.h>
+#include <asm/current.h>
+
+#define DEFINE(_sym, _val) \
+    __asm__ __volatile__ ( "\n->" #_sym " %0 " #_val : : "i" (_val) )
+#define BLANK() \
+    __asm__ __volatile__ ( "\n->" : : )
+#define OFFSET(_sym, _str, _mem) \
+    DEFINE(_sym, offsetof(_str, _mem));
+
+/* base-2 logarithm */
+#define __L2(_x)  (((_x) & 0x00000002) ?   1 : 0)
+#define __L4(_x)  (((_x) & 0x0000000c) ? ( 2 + __L2( (_x)>> 2)) : __L2( _x))
+#define __L8(_x)  (((_x) & 0x000000f0) ? ( 4 + __L4( (_x)>> 4)) : __L4( _x))
+#define __L16(_x) (((_x) & 0x0000ff00) ? ( 8 + __L8( (_x)>> 8)) : __L8( _x))
+#define LOG_2(_x) (((_x) & 0xffff0000) ? (16 + __L16((_x)>>16)) : __L16(_x))
+
+void __dummy__(void)
+{
+   OFFSET(UREGS_sp, struct cpu_user_regs, sp);
+   OFFSET(UREGS_lr, struct cpu_user_regs, lr);
+   OFFSET(UREGS_pc, struct cpu_user_regs, pc);
+   OFFSET(UREGS_cpsr, struct cpu_user_regs, cpsr);
+
+   OFFSET(UREGS_LR_usr, struct cpu_user_regs, lr_usr);
+   OFFSET(UREGS_SP_usr, struct cpu_user_regs, sp_usr);
+
+   OFFSET(UREGS_SP_svc, struct cpu_user_regs, sp_svc);
+   OFFSET(UREGS_LR_svc, struct cpu_user_regs, lr_svc);
+   OFFSET(UREGS_SPSR_svc, struct cpu_user_regs, spsr_svc);
+
+   OFFSET(UREGS_SP_abt, struct cpu_user_regs, sp_abt);
+   OFFSET(UREGS_LR_abt, struct cpu_user_regs, lr_abt);
+   OFFSET(UREGS_SPSR_abt, struct cpu_user_regs, spsr_abt);
+
+   OFFSET(UREGS_SP_und, struct cpu_user_regs, sp_und);
+   OFFSET(UREGS_LR_und, struct cpu_user_regs, lr_und);
+   OFFSET(UREGS_SPSR_und, struct cpu_user_regs, spsr_und);
+
+   OFFSET(UREGS_SP_irq, struct cpu_user_regs, sp_irq);
+   OFFSET(UREGS_LR_irq, struct cpu_user_regs, lr_irq);
+   OFFSET(UREGS_SPSR_irq, struct cpu_user_regs, spsr_irq);
+
+   OFFSET(UREGS_SP_fiq, struct cpu_user_regs, sp_fiq);
+   OFFSET(UREGS_LR_fiq, struct cpu_user_regs, lr_fiq);
+   OFFSET(UREGS_SPSR_fiq, struct cpu_user_regs, spsr_fiq);
+
+   OFFSET(UREGS_R8_fiq, struct cpu_user_regs, r8_fiq);
+   OFFSET(UREGS_R9_fiq, struct cpu_user_regs, r9_fiq);
+   OFFSET(UREGS_R10_fiq, struct cpu_user_regs, r10_fiq);
+   OFFSET(UREGS_R11_fiq, struct cpu_user_regs, r11_fiq);
+   OFFSET(UREGS_R12_fiq, struct cpu_user_regs, r12_fiq);
+
+   OFFSET(UREGS_kernel_sizeof, struct cpu_user_regs, cpsr);
+   DEFINE(UREGS_user_sizeof, sizeof(struct cpu_user_regs));
+   BLANK();
+
+   DEFINE(CPUINFO_sizeof, sizeof(struct cpu_info));
+
+   OFFSET(VCPU_arch_saved_context, struct vcpu, arch.saved_context);
+}
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff -r 984086ca8ca0 -r b094ba4bf985 xen/arch/arm/arm32/entry.S
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/arch/arm/arm32/entry.S        Wed Dec 19 14:16:30 2012 +0000
@@ -0,0 +1,141 @@
+#include <xen/config.h>
+#include <asm/asm_defns.h>
+#include <public/xen.h>
+
+#define SAVE_ONE_BANKED(reg)    mrs r11, reg; str r11, [sp, #UREGS_##reg]
+#define RESTORE_ONE_BANKED(reg) ldr r11, [sp, #UREGS_##reg]; msr reg, r11
+
+#define SAVE_BANKED(mode) \
+        SAVE_ONE_BANKED(SP_##mode) ; SAVE_ONE_BANKED(LR_##mode) ; 
SAVE_ONE_BANKED(SPSR_##mode)
+
+#define RESTORE_BANKED(mode) \
+        RESTORE_ONE_BANKED(SP_##mode) ; RESTORE_ONE_BANKED(LR_##mode) ; 
RESTORE_ONE_BANKED(SPSR_##mode)
+
+#define SAVE_ALL                                                        \
+        sub sp, #(UREGS_SP_usr - UREGS_sp); /* SP, LR, SPSR, PC */      \
+        push {r0-r12}; /* Save R0-R12 */                                \
+                                                                        \
+        mrs r11, ELR_hyp;               /* ELR_hyp is return address. */\
+        str r11, [sp, #UREGS_pc];                                       \
+                                                                        \
+        str lr, [sp, #UREGS_lr];                                        \
+                                                                        \
+        add r11, sp, #UREGS_kernel_sizeof+4;                            \
+        str r11, [sp, #UREGS_sp];                                       \
+                                                                        \
+        mrs r11, SPSR_hyp;                                              \
+        str r11, [sp, #UREGS_cpsr];                                     \
+        and r11, #PSR_MODE_MASK;                                        \
+        cmp r11, #PSR_MODE_HYP;                                         \
+        blne save_guest_regs
+
+save_guest_regs:
+        ldr r11, =0xffffffff  /* Clobber SP which is only valid for hypervisor 
frames. */
+        str r11, [sp, #UREGS_sp]
+        SAVE_ONE_BANKED(SP_usr)
+        /* LR_usr is the same physical register as lr and is saved in SAVE_ALL 
*/
+        SAVE_BANKED(svc)
+        SAVE_BANKED(abt)
+        SAVE_BANKED(und)
+        SAVE_BANKED(irq)
+        SAVE_BANKED(fiq)
+        SAVE_ONE_BANKED(R8_fiq); SAVE_ONE_BANKED(R9_fiq); 
SAVE_ONE_BANKED(R10_fiq)
+        SAVE_ONE_BANKED(R11_fiq); SAVE_ONE_BANKED(R12_fiq);
+        mov pc, lr
+
+#define DEFINE_TRAP_ENTRY(trap)                                         \
+        ALIGN;                                                          \
+trap_##trap:                                                            \
+        SAVE_ALL;                                                       \
+        cpsie i;        /* local_irq_enable */                          \
+        adr lr, return_from_trap;                                       \
+        mov r0, sp;                                                     \
+        mov r11, sp;                                                    \
+        bic sp, #7; /* Align the stack pointer (noop on guest trap) */  \
+        b do_trap_##trap
+
+#define DEFINE_TRAP_ENTRY_NOIRQ(trap)                                   \
+        ALIGN;                                                          \
+trap_##trap:                                                            \
+        SAVE_ALL;                                                       \
+        adr lr, return_from_trap;                                       \
+        mov r0, sp;                                                     \
+        mov r11, sp;                                                    \
+        bic sp, #7; /* Align the stack pointer (noop on guest trap) */  \
+        b do_trap_##trap
+
+.globl hyp_traps_vector
+        .align 5
+hyp_traps_vector:
+        .word 0                         /* 0x00 - Reset */
+        b trap_undefined_instruction    /* 0x04 - Undefined Instruction */
+        b trap_supervisor_call          /* 0x08 - Supervisor Call */
+        b trap_prefetch_abort           /* 0x0c - Prefetch Abort */
+        b trap_data_abort               /* 0x10 - Data Abort */
+        b trap_hypervisor               /* 0x14 - Hypervisor */
+        b trap_irq                      /* 0x18 - IRQ */
+        b trap_fiq                      /* 0x1c - FIQ */
+
+DEFINE_TRAP_ENTRY(undefined_instruction)
+DEFINE_TRAP_ENTRY(supervisor_call)
+DEFINE_TRAP_ENTRY(prefetch_abort)
+DEFINE_TRAP_ENTRY(data_abort)
+DEFINE_TRAP_ENTRY(hypervisor)
+DEFINE_TRAP_ENTRY_NOIRQ(irq)
+DEFINE_TRAP_ENTRY_NOIRQ(fiq)
+
+return_from_trap:
+        mov sp, r11
+ENTRY(return_to_new_vcpu)
+        ldr r11, [sp, #UREGS_cpsr]
+        and r11, #PSR_MODE_MASK
+        cmp r11, #PSR_MODE_HYP
+        beq return_to_hypervisor
+        /* Fall thru */
+ENTRY(return_to_guest)
+        mov r11, sp
+        bic sp, #7 /* Align the stack pointer */
+        bl leave_hypervisor_tail /* Disables interrupts on return */
+        mov sp, r11
+        RESTORE_ONE_BANKED(SP_usr)
+        /* LR_usr is the same physical register as lr and is restored below */
+        RESTORE_BANKED(svc)
+        RESTORE_BANKED(abt)
+        RESTORE_BANKED(und)
+        RESTORE_BANKED(irq)
+        RESTORE_BANKED(fiq)
+        RESTORE_ONE_BANKED(R8_fiq); RESTORE_ONE_BANKED(R9_fiq); 
RESTORE_ONE_BANKED(R10_fiq)
+        RESTORE_ONE_BANKED(R11_fiq); RESTORE_ONE_BANKED(R12_fiq);
+        /* Fall thru */
+ENTRY(return_to_hypervisor)
+        cpsid i
+        ldr lr, [sp, #UREGS_lr]
+        ldr r11, [sp, #UREGS_pc]
+        msr ELR_hyp, r11
+        ldr r11, [sp, #UREGS_cpsr]
+        msr SPSR_hyp, r11
+        pop {r0-r12}
+        add sp, #(UREGS_SP_usr - UREGS_sp); /* SP, LR, SPSR, PC */
+        eret
+
+/*
+ * struct vcpu *__context_switch(struct vcpu *prev, struct vcpu *next)
+ *
+ * r0 - prev
+ * r1 - next
+ *
+ * Returns prev in r0
+ */
+ENTRY(__context_switch)
+        add     ip, r0, #VCPU_arch_saved_context
+        stmia   ip!, {r4 - sl, fp, sp, lr}      /* Save register state */
+
+        add     r4, r1, #VCPU_arch_saved_context
+        ldmia   r4, {r4 - sl, fp, sp, pc}       /* Load registers and return */
+
+/*
+ * Local variables:
+ * mode: ASM
+ * indent-tabs-mode: nil
+ * End:
+ */
diff -r 984086ca8ca0 -r b094ba4bf985 xen/arch/arm/arm32/head.S
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/arch/arm/arm32/head.S Wed Dec 19 14:16:30 2012 +0000
@@ -0,0 +1,415 @@
+/*
+ * xen/arch/arm/head.S
+ *
+ * Start-of-day code for an ARMv7-A with virt extensions.
+ *
+ * Tim Deegan <tim@xxxxxxx>
+ * Copyright (c) 2011 Citrix Systems.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <asm/config.h>
+#include <asm/page.h>
+#include <asm/processor-ca15.h>
+#include <asm/asm_defns.h>
+
+#define ZIMAGE_MAGIC_NUMBER 0x016f2818
+
+#define PT_PT  0xe7f /* nG=1, AF=1, SH=10, AP=01, NS=1, ATTR=111, T=1, P=1 */
+#define PT_MEM 0xe7d /* nG=1, AF=1, SH=10, AP=01, NS=1, ATTR=111, T=0, P=1 */
+#define PT_DEV 0xe71 /* nG=1, AF=1, SH=10, AP=01, NS=1, ATTR=100, T=0, P=1 */
+#define PT_DEV_L3 0xe73 /* lev3: nG=1, AF=1, SH=10, AP=01, NS=1, ATTR=100, 
T=1, P=1 */
+
+#define PT_UPPER(x) (PT_##x & 0xf00)
+#define PT_LOWER(x) (PT_##x & 0x0ff)
+
+/* Macro to print a string to the UART, if there is one.
+ * Clobbers r0-r3. */
+#ifdef EARLY_UART_ADDRESS
+#define PRINT(_s)       \
+        adr   r0, 98f ; \
+        bl    puts    ; \
+        b     99f     ; \
+98:     .asciz _s     ; \
+        .align 2      ; \
+99:
+#else
+#define PRINT(s)
+#endif
+
+        .arm
+
+        /* This must be the very first address in the loaded image.
+         * It should be linked at XEN_VIRT_START, and loaded at any
+         * 2MB-aligned address.  All of text+data+bss must fit in 2MB,
+         * or the initial pagetable code below will need adjustment. */
+        .global start
+start:
+
+        /* zImage magic header, see:
+         * 
http://www.simtec.co.uk/products/SWLINUX/files/booting_article.html#d0e309
+         */
+        .rept 8
+        mov   r0, r0
+        .endr
+        b     past_zImage
+
+        .word ZIMAGE_MAGIC_NUMBER    /* Magic numbers to help the loader */
+        .word 0x00000000             /* absolute load/run zImage address or
+                                      * 0 for PiC */
+        .word (_end - start)         /* zImage end address */
+
+past_zImage:
+        cpsid aif                    /* Disable all interrupts */
+
+        /* Save the bootloader arguments in less-clobberable registers */
+        mov   r7, r1                 /* r7 := ARM-linux machine type */
+        mov   r8, r2                 /* r8 := ATAG base address */
+
+        /* Find out where we are */
+        ldr   r0, =start
+        adr   r9, start              /* r9  := paddr (start) */
+        sub   r10, r9, r0            /* r10 := phys-offset */
+
+        /* Using the DTB in the .dtb section? */
+#ifdef CONFIG_DTB_FILE
+        ldr   r8, =_sdtb
+        add   r8, r10                /* r8 := paddr(DTB) */
+#endif
+
+        /* Are we the boot CPU? */
+        mov   r12, #0                /* r12 := CPU ID */
+        mrc   CP32(r0, MPIDR)
+        tst   r0, #(1<<31)           /* Multiprocessor extension supported? */
+        beq   boot_cpu
+        tst   r0, #(1<<30)           /* Uniprocessor system? */
+        bne   boot_cpu
+        bics  r12, r0, #(0xff << 24) /* Mask out flags to get CPU ID */
+        beq   boot_cpu               /* If we're CPU 0, boot now */
+
+        /* Non-boot CPUs wait here to be woken up one at a time. */
+1:      dsb
+        ldr   r0, =smp_up_cpu        /* VA of gate */
+        add   r0, r0, r10            /* PA of gate */
+        ldr   r1, [r0]               /* Which CPU is being booted? */
+        teq   r1, r12                /* Is it us? */
+        wfene
+        bne   1b
+
+boot_cpu:
+#ifdef EARLY_UART_ADDRESS
+        ldr   r11, =EARLY_UART_ADDRESS  /* r11 := UART base address */
+        teq   r12, #0                   /* CPU 0 sets up the UART too */
+        bleq  init_uart
+        PRINT("- CPU ")
+        mov   r0, r12
+        bl    putn
+        PRINT(" booting -\r\n")
+#endif
+
+        /* Wake up secondary cpus */
+        teq   r12, #0
+        bleq  kick_cpus
+
+        /* Check that this CPU has Hyp mode */
+        mrc   CP32(r0, ID_PFR1)
+        and   r0, r0, #0xf000        /* Bits 12-15 define virt extensions */
+        teq   r0, #0x1000            /* Must == 0x1 or may be incompatible */
+        beq   1f
+        PRINT("- CPU doesn't support the virtualization extensions -\r\n")
+        b     fail
+1:
+        /* Check if we're already in it */
+        mrs   r0, cpsr
+        and   r0, r0, #0x1f          /* Mode is in the low 5 bits of CPSR */
+        teq   r0, #0x1a              /* Hyp Mode? */
+        bne   1f
+        PRINT("- Started in Hyp mode -\r\n")
+        b     hyp
+1:
+        /* Otherwise, it must have been Secure Supervisor mode */
+        mrc   CP32(r0, SCR)
+        tst   r0, #0x1               /* Not-Secure bit set? */
+        beq   1f
+        PRINT("- CPU is not in Hyp mode or Secure state -\r\n")
+        b     fail
+1:
+        /* OK, we're in Secure state. */
+        PRINT("- Started in Secure state -\r\n- Entering Hyp mode -\r\n")
+        ldr   r0, =enter_hyp_mode    /* VA of function */
+        adr   lr, hyp                /* Set return address for call */
+        add   pc, r0, r10            /* Call PA of function */
+
+hyp:
+
+        /* Zero BSS On the boot CPU to avoid nasty surprises */
+        teq   r12, #0
+        bne   skip_bss
+
+        PRINT("- Zero BSS -\r\n")
+        ldr   r0, =__bss_start       /* Load start & end of bss */
+        ldr   r1, =__bss_end
+        add   r0, r0, r10            /* Apply physical offset */
+        add   r1, r1, r10
+
+        mov   r2, #0
+1:      str   r2, [r0], #4
+        cmp   r0, r1
+        blo   1b
+
+skip_bss:
+
+        PRINT("- Setting up control registers -\r\n")
+
+        /* Read CPU ID */
+        mrc   CP32(r0, MIDR)
+        ldr   r1, =(MIDR_MASK)
+        and   r0, r0, r1
+        /* Is this a Cortex A15? */
+        ldr   r1, =(CORTEX_A15_ID)
+        teq   r0, r1
+        bleq  cortex_a15_init
+
+        /* Set up memory attribute type tables */
+        ldr   r0, =MAIR0VAL
+        ldr   r1, =MAIR1VAL
+        mcr   CP32(r0, MAIR0)
+        mcr   CP32(r1, MAIR1)
+        mcr   CP32(r0, HMAIR0)
+        mcr   CP32(r1, HMAIR1)
+
+        /* Set up the HTCR:
+         * PT walks use Outer-Shareable accesses,
+         * PT walks are write-back, no-write-allocate in both cache levels,
+         * Full 32-bit address space goes through this table. */
+        ldr   r0, =0x80002500
+        mcr   CP32(r0, HTCR)
+
+        /* Set up the HSCTLR:
+         * Exceptions in LE ARM,
+         * Low-latency IRQs disabled,
+         * Write-implies-XN disabled (for now),
+         * D-cache disabled (for now),
+         * I-cache enabled,
+         * Alignment checking enabled,
+         * MMU translation disabled (for now). */
+        ldr   r0, =(HSCTLR_BASE|SCTLR_A)
+        mcr   CP32(r0, HSCTLR)
+
+        /* Write Xen's PT's paddr into the HTTBR */
+        ldr   r4, =xen_pgtable
+        add   r4, r4, r10            /* r4 := paddr (xen_pagetable) */
+        mov   r5, #0                 /* r4:r5 is paddr (xen_pagetable) */
+        mcrr  CP64(r4, r5, HTTBR)
+
+        /* Non-boot CPUs don't need to rebuild the pagetable */
+        teq   r12, #0
+        bne   pt_ready
+
+        /* console fixmap */
+#ifdef EARLY_UART_ADDRESS
+        ldr   r1, =xen_fixmap
+        add   r1, r1, r10            /* r1 := paddr (xen_fixmap) */
+        mov   r3, #0
+        lsr   r2, r11, #12
+        lsl   r2, r2, #12            /* 4K aligned paddr of UART */
+        orr   r2, r2, #PT_UPPER(DEV_L3)
+        orr   r2, r2, #PT_LOWER(DEV_L3) /* r2:r3 := 4K dev map including UART 
*/
+        strd  r2, r3, [r1, #(FIXMAP_CONSOLE*8)] /* Map it in the first 
fixmap's slot */
+#endif
+
+        /* Build the baseline idle pagetable's first-level entries */
+        ldr   r1, =xen_second
+        add   r1, r1, r10            /* r1 := paddr (xen_second) */
+        mov   r3, #0x0
+        orr   r2, r1, #PT_UPPER(PT)  /* r2:r3 := table map of xen_second */
+        orr   r2, r2, #PT_LOWER(PT)  /* (+ rights for linear PT) */
+        strd  r2, r3, [r4, #0]       /* Map it in slot 0 */
+        add   r2, r2, #0x1000
+        strd  r2, r3, [r4, #8]       /* Map 2nd page in slot 1 */
+        add   r2, r2, #0x1000
+        strd  r2, r3, [r4, #16]      /* Map 3rd page in slot 2 */
+        add   r2, r2, #0x1000
+        strd  r2, r3, [r4, #24]      /* Map 4th page in slot 3 */
+
+        /* Now set up the second-level entries */
+        orr   r2, r9, #PT_UPPER(MEM)
+        orr   r2, r2, #PT_LOWER(MEM) /* r2:r3 := 2MB normal map of Xen */
+        mov   r4, r9, lsr #18        /* Slot for paddr(start) */
+        strd  r2, r3, [r1, r4]       /* Map Xen there */
+        ldr   r4, =start
+        lsr   r4, #18                /* Slot for vaddr(start) */
+        strd  r2, r3, [r1, r4]       /* Map Xen there too */
+
+        /* xen_fixmap pagetable */
+        ldr   r2, =xen_fixmap
+        add   r2, r2, r10            /* r2 := paddr (xen_fixmap) */
+        orr   r2, r2, #PT_UPPER(PT)
+        orr   r2, r2, #PT_LOWER(PT)  /* r2:r3 := table map of xen_fixmap */
+        add   r4, r4, #8
+        strd  r2, r3, [r1, r4]       /* Map it in the fixmap's slot */
+
+        mov   r3, #0x0
+        lsr   r2, r8, #21
+        lsl   r2, r2, #21            /* 2MB-aligned paddr of DTB */
+        orr   r2, r2, #PT_UPPER(MEM)
+        orr   r2, r2, #PT_LOWER(MEM) /* r2:r3 := 2MB RAM incl. DTB */
+        add   r4, r4, #8
+        strd  r2, r3, [r1, r4]       /* Map it in the early boot slot */
+
+pt_ready:
+        PRINT("- Turning on paging -\r\n")
+
+        ldr   r1, =paging            /* Explicit vaddr, not RIP-relative */
+        mrc   CP32(r0, HSCTLR)
+        orr   r0, r0, #(SCTLR_M|SCTLR_C) /* Enable MMU and D-cache */
+        dsb                          /* Flush PTE writes and finish reads */
+        mcr   CP32(r0, HSCTLR)       /* now paging is enabled */
+        isb                          /* Now, flush the icache */
+        mov   pc, r1                 /* Get a proper vaddr into PC */
+paging:
+
+
+#ifdef EARLY_UART_ADDRESS
+        /* Use a virtual address to access the UART. */
+        ldr   r11, =FIXMAP_ADDR(FIXMAP_CONSOLE)
+#endif
+
+        PRINT("- Ready -\r\n")
+
+        /* The boot CPU should go straight into C now */
+        teq   r12, #0
+        beq   launch
+
+        /* Non-boot CPUs need to move on to the relocated pagetables */
+        mov   r0, #0
+        ldr   r4, =boot_httbr        /* VA of HTTBR value stashed by CPU 0 */
+        add   r4, r4, r10            /* PA of it */
+        ldrd  r4, r5, [r4]           /* Actual value */
+        dsb
+        mcrr  CP64(r4, r5, HTTBR)
+        dsb
+        isb
+        mcr   CP32(r0, TLBIALLH)     /* Flush hypervisor TLB */
+        mcr   CP32(r0, ICIALLU)      /* Flush I-cache */
+        mcr   CP32(r0, BPIALL)       /* Flush branch predictor */
+        dsb                          /* Ensure completion of TLB+BP flush */
+        isb
+
+        /* Non-boot CPUs report that they've got this far */
+        ldr   r0, =ready_cpus
+1:      ldrex r1, [r0]               /*            { read # of ready CPUs } */
+        add   r1, r1, #1             /* Atomically { ++                   } */
+        strex r2, r1, [r0]           /*            { writeback            } */
+        teq   r2, #0
+        bne   1b
+        dsb
+        mcr   CP32(r0, DCCMVAC)      /* flush D-Cache */
+        dsb
+
+        /* Here, the non-boot CPUs must wait again -- they're now running on
+         * the boot CPU's pagetables so it's safe for the boot CPU to
+         * overwrite the non-relocated copy of Xen.  Once it's done that,
+         * and brought up the memory allocator, non-boot CPUs can get their
+         * own stacks and enter C. */
+1:      wfe
+        dsb
+        ldr   r0, =smp_up_cpu
+        ldr   r1, [r0]               /* Which CPU is being booted? */
+        teq   r1, r12                /* Is it us? */
+        bne   1b
+
+launch:
+        ldr   r0, =init_stack        /* Find the boot-time stack */
+        ldr   sp, [r0]
+        add   sp, #STACK_SIZE        /* (which grows down from the top). */
+        sub   sp, #CPUINFO_sizeof    /* Make room for CPU save record */
+        mov   r0, r10                /* Marshal args: - phys_offset */
+        mov   r1, r7                 /*               - machine type */
+        mov   r2, r8                 /*               - ATAG address */
+        movs  r3, r12                /*               - CPU ID */
+        beq   start_xen              /* and disappear into the land of C */
+        b     start_secondary        /* (to the appropriate entry point) */
+
+/* Fail-stop
+ * r0: string explaining why */
+fail:   PRINT("- Boot failed -\r\n")
+1:      wfe
+        b     1b
+
+#ifdef EARLY_UART_ADDRESS
+
+/* Bring up the UART. Specific to the PL011 UART.
+ * Clobbers r0-r2 */
+init_uart:
+        mov   r1, #0x0
+        str   r1, [r11, #0x24]       /* -> UARTIBRD (Baud divisor fraction) */
+        mov   r1, #0x4               /* 7.3728MHz / 0x4 == 16 * 115200 */
+        str   r1, [r11, #0x24]       /* -> UARTIBRD (Baud divisor integer) */
+        mov   r1, #0x60              /* 8n1 */
+        str   r1, [r11, #0x24]       /* -> UARTLCR_H (Line control) */
+        ldr   r1, =0x00000301        /* RXE | TXE | UARTEN */
+        str   r1, [r11, #0x30]       /* -> UARTCR (Control Register) */
+        adr   r0, 1f
+        b     puts
+1:      .asciz "- UART enabled -\r\n"
+        .align 4
+
+/* Print early debug messages.  Specific to the PL011 UART.
+ * r0: Nul-terminated string to print.
+ * Clobbers r0-r2 */
+puts:
+        ldr   r2, [r11, #0x18]       /* <- UARTFR (Flag register) */
+        tst   r2, #0x8               /* Check BUSY bit */
+        bne   puts                   /* Wait for the UART to be ready */
+        ldrb  r2, [r0], #1           /* Load next char */
+        teq   r2, #0                 /* Exit on nul */
+        moveq pc, lr
+        str   r2, [r11]              /* -> UARTDR (Data Register) */
+        b     puts
+
+/* Print a 32-bit number in hex.  Specific to the PL011 UART.
+ * r0: Number to print.
+ * clobbers r0-r3 */
+putn:
+        adr   r1, hex
+        mov   r3, #8
+1:      ldr   r2, [r11, #0x18]       /* <- UARTFR (Flag register) */
+        tst   r2, #0x8               /* Check BUSY bit */
+        bne   1b                     /* Wait for the UART to be ready */
+        and   r2, r0, #0xf0000000    /* Mask off the top nybble */
+        ldrb  r2, [r1, r2, lsr #28]  /* Convert to a char */
+        str   r2, [r11]              /* -> UARTDR (Data Register) */
+        lsl   r0, #4                 /* Roll it through one nybble at a time */
+        subs  r3, r3, #1
+        bne   1b
+        mov   pc, lr
+
+hex:    .ascii "0123456789abcdef"
+        .align 2
+
+#else  /* EARLY_UART_ADDRESS */
+
+init_uart:
+.global early_puts
+early_puts:
+puts:
+putn:   mov   pc, lr
+
+#endif /* EARLY_UART_ADDRESS */
+
+/*
+ * Local variables:
+ * mode: ASM
+ * indent-tabs-mode: nil
+ * End:
+ */
diff -r 984086ca8ca0 -r b094ba4bf985 xen/arch/arm/arm32/lib/Makefile
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/arch/arm/arm32/lib/Makefile   Wed Dec 19 14:16:30 2012 +0000
@@ -0,0 +1,5 @@
+obj-y += memcpy.o memmove.o memset.o memzero.o
+obj-y += findbit.o setbit.o
+obj-y += setbit.o clearbit.o changebit.o
+obj-y += testsetbit.o testclearbit.o testchangebit.o
+obj-y += lib1funcs.o lshrdi3.o div64.o
diff -r 984086ca8ca0 -r b094ba4bf985 xen/arch/arm/arm32/lib/assembler.h
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/arch/arm/arm32/lib/assembler.h        Wed Dec 19 14:16:30 2012 +0000
@@ -0,0 +1,325 @@
+/* From Linux arch/arm/include/asm/assembler.h */
+/*
+ *  arch/arm/include/asm/assembler.h
+ *
+ *  Copyright (C) 1996-2000 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ *  This file contains arm architecture specific defines
+ *  for the different processors.
+ *
+ *  Do not include any C declarations in this file - it is included by
+ *  assembler source.
+ */
+#ifndef __ASM_ASSEMBLER_H__
+#define __ASM_ASSEMBLER_H__
+
+#ifndef __ASSEMBLY__
+#error "Only include this from assembly code"
+#endif
+
+// No Thumb, hence:
+#define W(instr)        instr
+#define ARM(instr...)   instr
+#define THUMB(instr...)
+
+#ifdef CONFIG_ARM_UNWIND
+#define UNWIND(code...)         code
+#else
+#define UNWIND(code...)
+#endif
+
+/*
+ * Endian independent macros for shifting bytes within registers.
+ */
+#ifndef __ARMEB__
+#define pull            lsr
+#define push            lsl
+#define get_byte_0      lsl #0
+#define get_byte_1     lsr #8
+#define get_byte_2     lsr #16
+#define get_byte_3     lsr #24
+#define put_byte_0      lsl #0
+#define put_byte_1     lsl #8
+#define put_byte_2     lsl #16
+#define put_byte_3     lsl #24
+#else
+#define pull            lsl
+#define push            lsr
+#define get_byte_0     lsr #24
+#define get_byte_1     lsr #16
+#define get_byte_2     lsr #8
+#define get_byte_3      lsl #0
+#define put_byte_0     lsl #24
+#define put_byte_1     lsl #16
+#define put_byte_2     lsl #8
+#define put_byte_3      lsl #0
+#endif
+
+/*
+ * Data preload for architectures that support it
+ */
+#if __LINUX_ARM_ARCH__ >= 5
+#define PLD(code...)   code
+#else
+#define PLD(code...)
+#endif
+
+/*
+ * This can be used to enable code to cacheline align the destination
+ * pointer when bulk writing to memory.  Experiments on StrongARM and
+ * XScale didn't show this a worthwhile thing to do when the cache is not
+ * set to write-allocate (this would need further testing on XScale when WA
+ * is used).
+ *
+ * On Feroceon there is much to gain however, regardless of cache mode.
+ */
+#ifdef CONFIG_CPU_FEROCEON
+#define CALGN(code...) code
+#else
+#define CALGN(code...)
+#endif
+
+/*
+ * Enable and disable interrupts
+ */
+#if __LINUX_ARM_ARCH__ >= 6
+       .macro  disable_irq_notrace
+       cpsid   i
+       .endm
+
+       .macro  enable_irq_notrace
+       cpsie   i
+       .endm
+#else
+       .macro  disable_irq_notrace
+       msr     cpsr_c, #PSR_I_BIT | SVC_MODE
+       .endm
+
+       .macro  enable_irq_notrace
+       msr     cpsr_c, #SVC_MODE
+       .endm
+#endif
+
+       .macro asm_trace_hardirqs_off
+#if defined(CONFIG_TRACE_IRQFLAGS)
+       stmdb   sp!, {r0-r3, ip, lr}
+       bl      trace_hardirqs_off
+       ldmia   sp!, {r0-r3, ip, lr}
+#endif
+       .endm
+
+       .macro asm_trace_hardirqs_on_cond, cond
+#if defined(CONFIG_TRACE_IRQFLAGS)
+       /*
+        * actually the registers should be pushed and pop'd conditionally, but
+        * after bl the flags are certainly clobbered
+        */
+       stmdb   sp!, {r0-r3, ip, lr}
+       bl\cond trace_hardirqs_on
+       ldmia   sp!, {r0-r3, ip, lr}
+#endif
+       .endm
+
+       .macro asm_trace_hardirqs_on
+       asm_trace_hardirqs_on_cond al
+       .endm
+
+       .macro disable_irq
+       disable_irq_notrace
+       asm_trace_hardirqs_off
+       .endm
+
+       .macro enable_irq
+       asm_trace_hardirqs_on
+       enable_irq_notrace
+       .endm
+/*
+ * Save the current IRQ state and disable IRQs.  Note that this macro
+ * assumes FIQs are enabled, and that the processor is in SVC mode.
+ */
+       .macro  save_and_disable_irqs, oldcpsr
+       mrs     \oldcpsr, cpsr
+       disable_irq
+       .endm
+
+/*
+ * Restore interrupt state previously stored in a register.  We don't
+ * guarantee that this will preserve the flags.
+ */
+       .macro  restore_irqs_notrace, oldcpsr
+       msr     cpsr_c, \oldcpsr
+       .endm
+
+       .macro restore_irqs, oldcpsr
+       tst     \oldcpsr, #PSR_I_BIT
+       asm_trace_hardirqs_on_cond eq
+       restore_irqs_notrace \oldcpsr
+       .endm
+
+#define USER(x...)                             \
+9999:  x;                                      \
+       .pushsection __ex_table,"a";            \
+       .align  3;                              \
+       .long   9999b,9001f;                    \
+       .popsection
+
+#ifdef CONFIG_SMP
+#define ALT_SMP(instr...)                                      \
+9998:  instr
+/*
+ * Note: if you get assembler errors from ALT_UP() when building with
+ * CONFIG_THUMB2_KERNEL, you almost certainly need to use
+ * ALT_SMP( W(instr) ... )
+ */
+#define ALT_UP(instr...)                                       \
+       .pushsection ".alt.smp.init", "a"                       ;\
+       .long   9998b                                           ;\
+9997:  instr                                                   ;\
+       .if . - 9997b != 4                                      ;\
+               .error "ALT_UP() content must assemble to exactly 4 bytes";\
+       .endif                                                  ;\
+       .popsection
+#define ALT_UP_B(label)                                        \
+       .equ    up_b_offset, label - 9998b                      ;\
+       .pushsection ".alt.smp.init", "a"                       ;\
+       .long   9998b                                           ;\
+       W(b)    . + up_b_offset                                 ;\
+       .popsection
+#else
+#define ALT_SMP(instr...)
+#define ALT_UP(instr...) instr
+#define ALT_UP_B(label) b label
+#endif
+
+/*
+ * Instruction barrier
+ */
+       .macro  instr_sync
+#if __LINUX_ARM_ARCH__ >= 7
+       isb
+#elif __LINUX_ARM_ARCH__ == 6
+       mcr     p15, 0, r0, c7, c5, 4
+#endif
+       .endm
+
+/*
+ * SMP data memory barrier
+ */
+       .macro  smp_dmb mode
+#ifdef CONFIG_SMP
+#if __LINUX_ARM_ARCH__ >= 7
+       .ifeqs "\mode","arm"
+       ALT_SMP(dmb)
+       .else
+       ALT_SMP(W(dmb))
+       .endif
+#elif __LINUX_ARM_ARCH__ == 6
+       ALT_SMP(mcr     p15, 0, r0, c7, c10, 5) @ dmb
+#else
+#error Incompatible SMP platform
+#endif
+       .ifeqs "\mode","arm"
+       ALT_UP(nop)
+       .else
+       ALT_UP(W(nop))
+       .endif
+#endif
+       .endm
+
+#ifdef CONFIG_THUMB2_KERNEL
+       .macro  setmode, mode, reg
+       mov     \reg, #\mode
+       msr     cpsr_c, \reg
+       .endm
+#else
+       .macro  setmode, mode, reg
+       msr     cpsr_c, #\mode
+       .endm
+#endif
+
+/*
+ * STRT/LDRT access macros with ARM and Thumb-2 variants
+ */
+#ifdef CONFIG_THUMB2_KERNEL
+
+       .macro  usraccoff, instr, reg, ptr, inc, off, cond, abort, t=T()
+9999:
+       .if     \inc == 1
+       \instr\cond\()b\()\t\().w \reg, [\ptr, #\off]
+       .elseif \inc == 4
+       \instr\cond\()\t\().w \reg, [\ptr, #\off]
+       .else
+       .error  "Unsupported inc macro argument"
+       .endif
+
+       .pushsection __ex_table,"a"
+       .align  3
+       .long   9999b, \abort
+       .popsection
+       .endm
+
+       .macro  usracc, instr, reg, ptr, inc, cond, rept, abort
+       @ explicit IT instruction needed because of the label
+       @ introduced by the USER macro
+       .ifnc   \cond,al
+       .if     \rept == 1
+       itt     \cond
+       .elseif \rept == 2
+       ittt    \cond
+       .else
+       .error  "Unsupported rept macro argument"
+       .endif
+       .endif
+
+       @ Slightly optimised to avoid incrementing the pointer twice
+       usraccoff \instr, \reg, \ptr, \inc, 0, \cond, \abort
+       .if     \rept == 2
+       usraccoff \instr, \reg, \ptr, \inc, \inc, \cond, \abort
+       .endif
+
+       add\cond \ptr, #\rept * \inc
+       .endm
+
+#else  /* !CONFIG_THUMB2_KERNEL */
+
+       .macro  usracc, instr, reg, ptr, inc, cond, rept, abort, t=T()
+       .rept   \rept
+9999:
+       .if     \inc == 1
+       \instr\cond\()b\()\t \reg, [\ptr], #\inc
+       .elseif \inc == 4
+       \instr\cond\()\t \reg, [\ptr], #\inc
+       .else
+       .error  "Unsupported inc macro argument"
+       .endif
+
+       .pushsection __ex_table,"a"
+       .align  3
+       .long   9999b, \abort
+       .popsection
+       .endr
+       .endm
+
+#endif /* CONFIG_THUMB2_KERNEL */
+
+       .macro  strusr, reg, ptr, inc, cond=al, rept=1, abort=9001f
+       usracc  str, \reg, \ptr, \inc, \cond, \rept, \abort
+       .endm
+
+       .macro  ldrusr, reg, ptr, inc, cond=al, rept=1, abort=9001f
+       usracc  ldr, \reg, \ptr, \inc, \cond, \rept, \abort
+       .endm
+
+/* Utility macro for declaring string literals */
+       .macro  string name:req, string
+       .type \name , #object
+\name:
+       .asciz "\string"
+       .size \name , . - \name
+       .endm
+
+#endif /* __ASM_ASSEMBLER_H__ */
diff -r 984086ca8ca0 -r b094ba4bf985 xen/arch/arm/arm32/lib/bitops.h
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/arch/arm/arm32/lib/bitops.h   Wed Dec 19 14:16:30 2012 +0000
@@ -0,0 +1,87 @@
+#include <xen/config.h>
+
+#if __LINUX_ARM_ARCH__ >= 6
+       .macro  bitop, instr
+       ands    ip, r1, #3
+       strneb  r1, [ip]                @ assert word-aligned
+       mov     r2, #1
+       and     r3, r0, #31             @ Get bit offset
+       mov     r0, r0, lsr #5
+       add     r1, r1, r0, lsl #2      @ Get word offset
+       mov     r3, r2, lsl r3
+1:     ldrex   r2, [r1]
+       \instr  r2, r2, r3
+       strex   r0, r2, [r1]
+       cmp     r0, #0
+       bne     1b
+       bx      lr
+       .endm
+
+       .macro  testop, instr, store
+       ands    ip, r1, #3
+       strneb  r1, [ip]                @ assert word-aligned
+       mov     r2, #1
+       and     r3, r0, #31             @ Get bit offset
+       mov     r0, r0, lsr #5
+       add     r1, r1, r0, lsl #2      @ Get word offset
+       mov     r3, r2, lsl r3          @ create mask
+       smp_dmb
+1:     ldrex   r2, [r1]
+       ands    r0, r2, r3              @ save old value of bit
+       \instr  r2, r2, r3              @ toggle bit
+       strex   ip, r2, [r1]
+       cmp     ip, #0
+       bne     1b
+       smp_dmb
+       cmp     r0, #0
+       movne   r0, #1
+2:     bx      lr
+       .endm
+#else
+       .macro  bitop, name, instr
+ENTRY( \name           )
+UNWIND(        .fnstart        )
+       ands    ip, r1, #3
+       strneb  r1, [ip]                @ assert word-aligned
+       and     r2, r0, #31
+       mov     r0, r0, lsr #5
+       mov     r3, #1
+       mov     r3, r3, lsl r2
+       save_and_disable_irqs ip
+       ldr     r2, [r1, r0, lsl #2]
+       \instr  r2, r2, r3
+       str     r2, [r1, r0, lsl #2]
+       restore_irqs ip
+       mov     pc, lr
+UNWIND(        .fnend          )
+ENDPROC(\name          )
+       .endm
+
+/**
+ * testop - implement a test_and_xxx_bit operation.
+ * @instr: operational instruction
+ * @store: store instruction
+ *
+ * Note: we can trivially conditionalise the store instruction
+ * to avoid dirtying the data cache.
+ */
+       .macro  testop, name, instr, store
+ENTRY( \name           )
+UNWIND(        .fnstart        )
+       ands    ip, r1, #3
+       strneb  r1, [ip]                @ assert word-aligned
+       and     r3, r0, #31
+       mov     r0, r0, lsr #5
+       save_and_disable_irqs ip
+       ldr     r2, [r1, r0, lsl #2]!
+       mov     r0, #1
+       tst     r2, r0, lsl r3
+       \instr  r2, r2, r0, lsl r3
+       \store  r2, [r1]
+       moveq   r0, #0
+       restore_irqs ip
+       mov     pc, lr
+UNWIND(        .fnend          )
+ENDPROC(\name          )
+       .endm
+#endif
diff -r 984086ca8ca0 -r b094ba4bf985 xen/arch/arm/arm32/lib/changebit.S
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/arch/arm/arm32/lib/changebit.S        Wed Dec 19 14:16:30 2012 +0000
@@ -0,0 +1,18 @@
+/*
+ *  linux/arch/arm/lib/changebit.S
+ *
+ *  Copyright (C) 1995-1996 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <xen/config.h>
+
+#include "assembler.h"
+#include "bitops.h"
+                .text
+
+ENTRY(_change_bit)
+       bitop   eor
+ENDPROC(_change_bit)
diff -r 984086ca8ca0 -r b094ba4bf985 xen/arch/arm/arm32/lib/clearbit.S
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/arch/arm/arm32/lib/clearbit.S Wed Dec 19 14:16:30 2012 +0000
@@ -0,0 +1,19 @@
+/*
+ *  linux/arch/arm/lib/clearbit.S
+ *
+ *  Copyright (C) 1995-1996 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <xen/config.h>
+
+#include "assembler.h"
+#include "bitops.h"
+                .text
+
+ENTRY(_clear_bit)
+       bitop   bic
+ENDPROC(_clear_bit)
diff -r 984086ca8ca0 -r b094ba4bf985 xen/arch/arm/arm32/lib/copy_template.S
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/arch/arm/arm32/lib/copy_template.S    Wed Dec 19 14:16:30 2012 +0000
@@ -0,0 +1,267 @@
+/*
+ *  linux/arch/arm/lib/copy_template.s
+ *
+ *  Code template for optimized memory copy functions
+ *
+ *  Author:    Nicolas Pitre
+ *  Created:   Sep 28, 2005
+ *  Copyright: MontaVista Software, Inc.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License version 2 as
+ *  published by the Free Software Foundation.
+ */
+
+/*
+ * Theory of operation
+ * -------------------
+ *
+ * This file provides the core code for a forward memory copy used in
+ * the implementation of memcopy(), copy_to_user() and copy_from_user().
+ *
+ * The including file must define the following accessor macros
+ * according to the need of the given function:
+ *
+ * ldr1w ptr reg abort
+ *
+ *     This loads one word from 'ptr', stores it in 'reg' and increments
+ *     'ptr' to the next word. The 'abort' argument is used for fixup tables.
+ *
+ * ldr4w ptr reg1 reg2 reg3 reg4 abort
+ * ldr8w ptr, reg1 reg2 reg3 reg4 reg5 reg6 reg7 reg8 abort
+ *
+ *     This loads four or eight words starting from 'ptr', stores them
+ *     in provided registers and increments 'ptr' past those words.
+ *     The'abort' argument is used for fixup tables.
+ *
+ * ldr1b ptr reg cond abort
+ *
+ *     Similar to ldr1w, but it loads a byte and increments 'ptr' one byte.
+ *     It also must apply the condition code if provided, otherwise the
+ *     "al" condition is assumed by default.
+ *
+ * str1w ptr reg abort
+ * str8w ptr reg1 reg2 reg3 reg4 reg5 reg6 reg7 reg8 abort
+ * str1b ptr reg cond abort
+ *
+ *     Same as their ldr* counterparts, but data is stored to 'ptr' location
+ *     rather than being loaded.
+ *
+ * enter reg1 reg2
+ *
+ *     Preserve the provided registers on the stack plus any additional
+ *     data as needed by the implementation including this code. Called
+ *     upon code entry.
+ *
+ * exit reg1 reg2
+ *
+ *     Restore registers with the values previously saved with the
+ *     'preserv' macro. Called upon code termination.
+ *
+ * LDR1W_SHIFT
+ * STR1W_SHIFT
+ *
+ *     Correction to be applied to the "ip" register when branching into
+ *     the ldr1w or str1w instructions (some of these macros may expand to
+ *     than one 32bit instruction in Thumb-2)
+ */
+
+
+               enter   r4, lr
+
+               subs    r2, r2, #4
+               blt     8f
+               ands    ip, r0, #3
+       PLD(    pld     [r1, #0]                )
+               bne     9f
+               ands    ip, r1, #3
+               bne     10f
+
+1:             subs    r2, r2, #(28)
+               stmfd   sp!, {r5 - r8}
+               blt     5f
+
+       CALGN(  ands    ip, r0, #31             )
+       CALGN(  rsb     r3, ip, #32             )
+       CALGN(  sbcnes  r4, r3, r2              )  @ C is always set here
+       CALGN(  bcs     2f                      )
+       CALGN(  adr     r4, 6f                  )
+       CALGN(  subs    r2, r2, r3              )  @ C gets set
+       CALGN(  add     pc, r4, ip              )
+
+       PLD(    pld     [r1, #0]                )
+2:     PLD(    subs    r2, r2, #96             )
+       PLD(    pld     [r1, #28]               )
+       PLD(    blt     4f                      )
+       PLD(    pld     [r1, #60]               )
+       PLD(    pld     [r1, #92]               )
+
+3:     PLD(    pld     [r1, #124]              )
+4:             ldr8w   r1, r3, r4, r5, r6, r7, r8, ip, lr, abort=20f
+               subs    r2, r2, #32
+               str8w   r0, r3, r4, r5, r6, r7, r8, ip, lr, abort=20f
+               bge     3b
+       PLD(    cmn     r2, #96                 )
+       PLD(    bge     4b                      )
+
+5:             ands    ip, r2, #28
+               rsb     ip, ip, #32
+#if LDR1W_SHIFT > 0
+               lsl     ip, ip, #LDR1W_SHIFT
+#endif
+               addne   pc, pc, ip              @ C is always clear here
+               b       7f
+6:
+               .rept   (1 << LDR1W_SHIFT)
+               W(nop)
+               .endr
+               ldr1w   r1, r3, abort=20f
+               ldr1w   r1, r4, abort=20f
+               ldr1w   r1, r5, abort=20f
+               ldr1w   r1, r6, abort=20f
+               ldr1w   r1, r7, abort=20f
+               ldr1w   r1, r8, abort=20f
+               ldr1w   r1, lr, abort=20f
+
+#if LDR1W_SHIFT < STR1W_SHIFT
+               lsl     ip, ip, #STR1W_SHIFT - LDR1W_SHIFT
+#elif LDR1W_SHIFT > STR1W_SHIFT
+               lsr     ip, ip, #LDR1W_SHIFT - STR1W_SHIFT
+#endif
+               add     pc, pc, ip
+               nop
+               .rept   (1 << STR1W_SHIFT)
+               W(nop)
+               .endr
+               str1w   r0, r3, abort=20f
+               str1w   r0, r4, abort=20f
+               str1w   r0, r5, abort=20f
+               str1w   r0, r6, abort=20f
+               str1w   r0, r7, abort=20f
+               str1w   r0, r8, abort=20f
+               str1w   r0, lr, abort=20f
+
+       CALGN(  bcs     2b                      )
+
+7:             ldmfd   sp!, {r5 - r8}
+
+8:             movs    r2, r2, lsl #31
+               ldr1b   r1, r3, ne, abort=21f
+               ldr1b   r1, r4, cs, abort=21f
+               ldr1b   r1, ip, cs, abort=21f
+               str1b   r0, r3, ne, abort=21f
+               str1b   r0, r4, cs, abort=21f
+               str1b   r0, ip, cs, abort=21f
+
+               exit    r4, pc
+
+9:             rsb     ip, ip, #4
+               cmp     ip, #2
+               ldr1b   r1, r3, gt, abort=21f
+               ldr1b   r1, r4, ge, abort=21f
+               ldr1b   r1, lr, abort=21f
+               str1b   r0, r3, gt, abort=21f
+               str1b   r0, r4, ge, abort=21f
+               subs    r2, r2, ip
+               str1b   r0, lr, abort=21f
+               blt     8b
+               ands    ip, r1, #3
+               beq     1b
+
+10:            bic     r1, r1, #3
+               cmp     ip, #2
+               ldr1w   r1, lr, abort=21f
+               beq     17f
+               bgt     18f
+
+
+               .macro  forward_copy_shift pull push
+
+               subs    r2, r2, #28
+               blt     14f
+
+       CALGN(  ands    ip, r0, #31             )
+       CALGN(  rsb     ip, ip, #32             )
+       CALGN(  sbcnes  r4, ip, r2              )  @ C is always set here
+       CALGN(  subcc   r2, r2, ip              )
+       CALGN(  bcc     15f                     )
+
+11:            stmfd   sp!, {r5 - r9}
+
+       PLD(    pld     [r1, #0]                )
+       PLD(    subs    r2, r2, #96             )
+       PLD(    pld     [r1, #28]               )
+       PLD(    blt     13f                     )
+       PLD(    pld     [r1, #60]               )
+       PLD(    pld     [r1, #92]               )
+
+12:    PLD(    pld     [r1, #124]              )
+13:            ldr4w   r1, r4, r5, r6, r7, abort=19f
+               mov     r3, lr, pull #\pull
+               subs    r2, r2, #32
+               ldr4w   r1, r8, r9, ip, lr, abort=19f
+               orr     r3, r3, r4, push #\push
+               mov     r4, r4, pull #\pull
+               orr     r4, r4, r5, push #\push
+               mov     r5, r5, pull #\pull
+               orr     r5, r5, r6, push #\push
+               mov     r6, r6, pull #\pull
+               orr     r6, r6, r7, push #\push
+               mov     r7, r7, pull #\pull
+               orr     r7, r7, r8, push #\push
+               mov     r8, r8, pull #\pull
+               orr     r8, r8, r9, push #\push
+               mov     r9, r9, pull #\pull
+               orr     r9, r9, ip, push #\push
+               mov     ip, ip, pull #\pull
+               orr     ip, ip, lr, push #\push
+               str8w   r0, r3, r4, r5, r6, r7, r8, r9, ip, , abort=19f
+               bge     12b
+       PLD(    cmn     r2, #96                 )
+       PLD(    bge     13b                     )
+
+               ldmfd   sp!, {r5 - r9}
+
+14:            ands    ip, r2, #28
+               beq     16f
+
+15:            mov     r3, lr, pull #\pull
+               ldr1w   r1, lr, abort=21f
+               subs    ip, ip, #4
+               orr     r3, r3, lr, push #\push
+               str1w   r0, r3, abort=21f
+               bgt     15b
+       CALGN(  cmp     r2, #0                  )
+       CALGN(  bge     11b                     )
+
+16:            sub     r1, r1, #(\push / 8)
+               b       8b
+
+               .endm
+
+
+               forward_copy_shift      pull=8  push=24
+
+17:            forward_copy_shift      pull=16 push=16
+
+18:            forward_copy_shift      pull=24 push=8
+
+
+/*
+ * Abort preamble and completion macros.
+ * If a fixup handler is required then those macros must surround it.
+ * It is assumed that the fixup code will handle the private part of
+ * the exit macro.
+ */
+
+       .macro  copy_abort_preamble
+19:    ldmfd   sp!, {r5 - r9}
+       b       21f
+20:    ldmfd   sp!, {r5 - r8}
+21:
+       .endm
+
+       .macro  copy_abort_end
+       ldmfd   sp!, {r4, pc}
+       .endm
+
diff -r 984086ca8ca0 -r b094ba4bf985 xen/arch/arm/arm32/lib/div64.S
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/arch/arm/arm32/lib/div64.S    Wed Dec 19 14:16:30 2012 +0000
@@ -0,0 +1,211 @@
+/*
+ *  linux/arch/arm/lib/div64.S
+ *
+ *  Optimized computation of 64-bit dividend / 32-bit divisor
+ *
+ *  Author:    Nicolas Pitre
+ *  Created:   Oct 5, 2003
+ *  Copyright: Monta Vista Software, Inc.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License version 2 as
+ *  published by the Free Software Foundation.
+ */
+
+#include <xen/config.h>
+#include "assembler.h"
+       
+#ifdef __ARMEB__
+#define xh r0
+#define xl r1
+#define yh r2
+#define yl r3
+#else
+#define xl r0
+#define xh r1
+#define yl r2
+#define yh r3
+#endif
+
+/*
+ * __do_div64: perform a division with 64-bit dividend and 32-bit divisor.
+ *
+ * Note: Calling convention is totally non standard for optimal code.
+ *       This is meant to be used by do_div() from include/asm/div64.h only.
+ *
+ * Input parameters:
+ *     xh-xl   = dividend (clobbered)
+ *     r4      = divisor (preserved)
+ *
+ * Output values:
+ *     yh-yl   = result
+ *     xh      = remainder
+ *
+ * Clobbered regs: xl, ip
+ */
+
+ENTRY(__do_div64)
+UNWIND(.fnstart)
+
+       @ Test for easy paths first.
+       subs    ip, r4, #1
+       bls     9f                      @ divisor is 0 or 1
+       tst     ip, r4
+       beq     8f                      @ divisor is power of 2
+
+       @ See if we need to handle upper 32-bit result.
+       cmp     xh, r4
+       mov     yh, #0
+       blo     3f
+
+       @ Align divisor with upper part of dividend.
+       @ The aligned divisor is stored in yl preserving the original.
+       @ The bit position is stored in ip.
+
+#if __LINUX_ARM_ARCH__ >= 5
+
+       clz     yl, r4
+       clz     ip, xh
+       sub     yl, yl, ip
+       mov     ip, #1
+       mov     ip, ip, lsl yl
+       mov     yl, r4, lsl yl
+
+#else
+
+       mov     yl, r4
+       mov     ip, #1
+1:     cmp     yl, #0x80000000
+       cmpcc   yl, xh
+       movcc   yl, yl, lsl #1
+       movcc   ip, ip, lsl #1
+       bcc     1b
+
+#endif
+
+       @ The division loop for needed upper bit positions.
+       @ Break out early if dividend reaches 0.
+2:     cmp     xh, yl
+       orrcs   yh, yh, ip
+       subcss  xh, xh, yl
+       movnes  ip, ip, lsr #1
+       mov     yl, yl, lsr #1
+       bne     2b
+
+       @ See if we need to handle lower 32-bit result.
+3:     cmp     xh, #0
+       mov     yl, #0
+       cmpeq   xl, r4
+       movlo   xh, xl
+       movlo   pc, lr
+
+       @ The division loop for lower bit positions.
+       @ Here we shift remainer bits leftwards rather than moving the
+       @ divisor for comparisons, considering the carry-out bit as well.
+       mov     ip, #0x80000000
+4:     movs    xl, xl, lsl #1
+       adcs    xh, xh, xh
+       beq     6f
+       cmpcc   xh, r4
+5:     orrcs   yl, yl, ip
+       subcs   xh, xh, r4
+       movs    ip, ip, lsr #1
+       bne     4b
+       mov     pc, lr
+
+       @ The top part of remainder became zero.  If carry is set
+       @ (the 33th bit) this is a false positive so resume the loop.
+       @ Otherwise, if lower part is also null then we are done.
+6:     bcs     5b
+       cmp     xl, #0
+       moveq   pc, lr
+
+       @ We still have remainer bits in the low part.  Bring them up.
+
+#if __LINUX_ARM_ARCH__ >= 5
+
+       clz     xh, xl                  @ we know xh is zero here so...
+       add     xh, xh, #1
+       mov     xl, xl, lsl xh
+       mov     ip, ip, lsr xh
+
+#else
+
+7:     movs    xl, xl, lsl #1
+       mov     ip, ip, lsr #1
+       bcc     7b
+
+#endif
+
+       @ Current remainder is now 1.  It is worthless to compare with
+       @ divisor at this point since divisor can not be smaller than 3 here.
+       @ If possible, branch for another shift in the division loop.
+       @ If no bit position left then we are done.
+       movs    ip, ip, lsr #1
+       mov     xh, #1
+       bne     4b
+       mov     pc, lr
+
+8:     @ Division by a power of 2: determine what that divisor order is
+       @ then simply shift values around
+
+#if __LINUX_ARM_ARCH__ >= 5
+
+       clz     ip, r4
+       rsb     ip, ip, #31
+
+#else
+
+       mov     yl, r4
+       cmp     r4, #(1 << 16)
+       mov     ip, #0
+       movhs   yl, yl, lsr #16
+       movhs   ip, #16
+
+       cmp     yl, #(1 << 8)
+       movhs   yl, yl, lsr #8
+       addhs   ip, ip, #8
+
+       cmp     yl, #(1 << 4)
+       movhs   yl, yl, lsr #4
+       addhs   ip, ip, #4
+
+       cmp     yl, #(1 << 2)
+       addhi   ip, ip, #3
+       addls   ip, ip, yl, lsr #1
+
+#endif
+
+       mov     yh, xh, lsr ip
+       mov     yl, xl, lsr ip
+       rsb     ip, ip, #32
+ ARM(  orr     yl, yl, xh, lsl ip      )
+ THUMB(        lsl     xh, xh, ip              )
+ THUMB(        orr     yl, yl, xh              )
+       mov     xh, xl, lsl ip
+       mov     xh, xh, lsr ip
+       mov     pc, lr
+
+       @ eq -> division by 1: obvious enough...
+9:     moveq   yl, xl
+       moveq   yh, xh
+       moveq   xh, #0
+       moveq   pc, lr
+UNWIND(.fnend)
+
+UNWIND(.fnstart)
+UNWIND(.pad #4)
+UNWIND(.save {lr})
+Ldiv0_64:
+       @ Division by 0:
+       str     lr, [sp, #-8]!
+       bl      __div0
+
+       @ as wrong as it could be...
+       mov     yl, #0
+       mov     yh, #0
+       mov     xh, #0
+       ldr     pc, [sp], #8
+
+UNWIND(.fnend)
+ENDPROC(__do_div64)
diff -r 984086ca8ca0 -r b094ba4bf985 xen/arch/arm/arm32/lib/findbit.S
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/arch/arm/arm32/lib/findbit.S  Wed Dec 19 14:16:30 2012 +0000
@@ -0,0 +1,198 @@
+/*
+ *  linux/arch/arm/lib/findbit.S
+ *
+ *  Copyright (C) 1995-2000 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * 16th March 2001 - John Ripley <jripley@xxxxxxxxxxxxx>
+ *   Fixed so that "size" is an exclusive not an inclusive quantity.
+ *   All users of these functions expect exclusive sizes, and may
+ *   also call with zero size.
+ * Reworked by rmk.
+ */
+
+#include <xen/config.h>
+
+#include "assembler.h"
+                .text
+
+/*
+ * Purpose  : Find a 'zero' bit
+ * Prototype: int find_first_zero_bit(void *addr, unsigned int maxbit);
+ */
+ENTRY(_find_first_zero_bit_le)
+               teq     r1, #0  
+               beq     3f
+               mov     r2, #0
+1:
+ ARM(          ldrb    r3, [r0, r2, lsr #3]    )
+ THUMB(                lsr     r3, r2, #3              )
+ THUMB(                ldrb    r3, [r0, r3]            )
+               eors    r3, r3, #0xff           @ invert bits
+               bne     .L_found                @ any now set - found zero bit
+               add     r2, r2, #8              @ next bit pointer
+2:             cmp     r2, r1                  @ any more?
+               blo     1b
+3:             mov     r0, r1                  @ no free bits
+               mov     pc, lr
+ENDPROC(_find_first_zero_bit_le)
+
+/*
+ * Purpose  : Find next 'zero' bit
+ * Prototype: int find_next_zero_bit(void *addr, unsigned int maxbit, int 
offset)
+ */
+ENTRY(_find_next_zero_bit_le)
+               teq     r1, #0
+               beq     3b
+               ands    ip, r2, #7
+               beq     1b                      @ If new byte, goto old routine
+ ARM(          ldrb    r3, [r0, r2, lsr #3]    )
+ THUMB(                lsr     r3, r2, #3              )
+ THUMB(                ldrb    r3, [r0, r3]            )
+               eor     r3, r3, #0xff           @ now looking for a 1 bit
+               movs    r3, r3, lsr ip          @ shift off unused bits
+               bne     .L_found
+               orr     r2, r2, #7              @ if zero, then no bits here
+               add     r2, r2, #1              @ align bit pointer
+               b       2b                      @ loop for next bit
+ENDPROC(_find_next_zero_bit_le)
+
+/*
+ * Purpose  : Find a 'one' bit
+ * Prototype: int find_first_bit(const unsigned long *addr, unsigned int 
maxbit);
+ */
+ENTRY(_find_first_bit_le)
+               teq     r1, #0  
+               beq     3f
+               mov     r2, #0
+1:
+ ARM(          ldrb    r3, [r0, r2, lsr #3]    )
+ THUMB(                lsr     r3, r2, #3              )
+ THUMB(                ldrb    r3, [r0, r3]            )
+               movs    r3, r3
+               bne     .L_found                @ any now set - found zero bit
+               add     r2, r2, #8              @ next bit pointer
+2:             cmp     r2, r1                  @ any more?
+               blo     1b
+3:             mov     r0, r1                  @ no free bits
+               mov     pc, lr
+ENDPROC(_find_first_bit_le)
+
+/*
+ * Purpose  : Find next 'one' bit
+ * Prototype: int find_next_zero_bit(void *addr, unsigned int maxbit, int 
offset)
+ */
+ENTRY(_find_next_bit_le)
+               teq     r1, #0
+               beq     3b
+               ands    ip, r2, #7
+               beq     1b                      @ If new byte, goto old routine
+ ARM(          ldrb    r3, [r0, r2, lsr #3]    )
+ THUMB(                lsr     r3, r2, #3              )
+ THUMB(                ldrb    r3, [r0, r3]            )
+               movs    r3, r3, lsr ip          @ shift off unused bits
+               bne     .L_found
+               orr     r2, r2, #7              @ if zero, then no bits here
+               add     r2, r2, #1              @ align bit pointer
+               b       2b                      @ loop for next bit
+ENDPROC(_find_next_bit_le)
+
+#ifdef __ARMEB__
+
+ENTRY(_find_first_zero_bit_be)
+               teq     r1, #0
+               beq     3f
+               mov     r2, #0
+1:             eor     r3, r2, #0x18           @ big endian byte ordering
+ ARM(          ldrb    r3, [r0, r3, lsr #3]    )
+ THUMB(                lsr     r3, #3                  )
+ THUMB(                ldrb    r3, [r0, r3]            )
+               eors    r3, r3, #0xff           @ invert bits
+               bne     .L_found                @ any now set - found zero bit
+               add     r2, r2, #8              @ next bit pointer
+2:             cmp     r2, r1                  @ any more?
+               blo     1b
+3:             mov     r0, r1                  @ no free bits
+               mov     pc, lr
+ENDPROC(_find_first_zero_bit_be)
+
+ENTRY(_find_next_zero_bit_be)
+               teq     r1, #0
+               beq     3b
+               ands    ip, r2, #7
+               beq     1b                      @ If new byte, goto old routine
+               eor     r3, r2, #0x18           @ big endian byte ordering
+ ARM(          ldrb    r3, [r0, r3, lsr #3]    )
+ THUMB(                lsr     r3, #3                  )
+ THUMB(                ldrb    r3, [r0, r3]            )
+               eor     r3, r3, #0xff           @ now looking for a 1 bit
+               movs    r3, r3, lsr ip          @ shift off unused bits
+               bne     .L_found
+               orr     r2, r2, #7              @ if zero, then no bits here
+               add     r2, r2, #1              @ align bit pointer
+               b       2b                      @ loop for next bit
+ENDPROC(_find_next_zero_bit_be)
+
+ENTRY(_find_first_bit_be)
+               teq     r1, #0
+               beq     3f
+               mov     r2, #0
+1:             eor     r3, r2, #0x18           @ big endian byte ordering
+ ARM(          ldrb    r3, [r0, r3, lsr #3]    )
+ THUMB(                lsr     r3, #3                  )
+ THUMB(                ldrb    r3, [r0, r3]            )
+               movs    r3, r3
+               bne     .L_found                @ any now set - found zero bit
+               add     r2, r2, #8              @ next bit pointer
+2:             cmp     r2, r1                  @ any more?
+               blo     1b
+3:             mov     r0, r1                  @ no free bits
+               mov     pc, lr
+ENDPROC(_find_first_bit_be)
+
+ENTRY(_find_next_bit_be)
+               teq     r1, #0
+               beq     3b
+               ands    ip, r2, #7
+               beq     1b                      @ If new byte, goto old routine
+               eor     r3, r2, #0x18           @ big endian byte ordering
+ ARM(          ldrb    r3, [r0, r3, lsr #3]    )
+ THUMB(                lsr     r3, #3                  )
+ THUMB(                ldrb    r3, [r0, r3]            )
+               movs    r3, r3, lsr ip          @ shift off unused bits
+               bne     .L_found
+               orr     r2, r2, #7              @ if zero, then no bits here
+               add     r2, r2, #1              @ align bit pointer
+               b       2b                      @ loop for next bit
+ENDPROC(_find_next_bit_be)
+
+#endif
+
+/*
+ * One or more bits in the LSB of r3 are assumed to be set.
+ */
+.L_found:
+#if __LINUX_ARM_ARCH__ >= 5
+               rsb     r0, r3, #0
+               and     r3, r3, r0
+               clz     r3, r3
+               rsb     r3, r3, #31
+               add     r0, r2, r3
+#else
+               tst     r3, #0x0f
+               addeq   r2, r2, #4
+               movne   r3, r3, lsl #4
+               tst     r3, #0x30
+               addeq   r2, r2, #2
+               movne   r3, r3, lsl #2
+               tst     r3, #0x40
+               addeq   r2, r2, #1
+               mov     r0, r2
+#endif
+               cmp     r1, r0                  @ Clamp to maxbit
+               movlo   r0, r1
+               mov     pc, lr
+
diff -r 984086ca8ca0 -r b094ba4bf985 xen/arch/arm/arm32/lib/lib1funcs.S
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/arch/arm/arm32/lib/lib1funcs.S        Wed Dec 19 14:16:30 2012 +0000
@@ -0,0 +1,389 @@
+/*
+ * linux/arch/arm/lib/lib1funcs.S: Optimized ARM division routines
+ *
+ * Author: Nicolas Pitre <nico@xxxxxxxxxxx>
+ *   - contributed to gcc-3.4 on Sep 30, 2003
+ *   - adapted for the Linux kernel on Oct 2, 2003
+ */
+
+/* Copyright 1995, 1996, 1998, 1999, 2000, 2003 Free Software Foundation, Inc.
+
+This file is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the
+Free Software Foundation; either version 2, or (at your option) any
+later version.
+
+In addition to the permissions in the GNU General Public License, the
+Free Software Foundation gives you unlimited permission to link the
+compiled version of this file into combinations with other programs,
+and to distribute those combinations without any restriction coming
+from the use of this file.  (The General Public License restrictions
+do apply in other respects; for example, they cover modification of
+the file, and distribution when not linked into a combine
+executable.)
+
+This file is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; see the file COPYING.  If not, write to
+the Free Software Foundation, 59 Temple Place - Suite 330,
+Boston, MA 02111-1307, USA.  */
+
+
+#include <xen/config.h>
+#include "assembler.h"
+
+.macro ARM_DIV_BODY dividend, divisor, result, curbit
+
+#if __LINUX_ARM_ARCH__ >= 5
+
+       clz     \curbit, \divisor
+       clz     \result, \dividend
+       sub     \result, \curbit, \result
+       mov     \curbit, #1
+       mov     \divisor, \divisor, lsl \result
+       mov     \curbit, \curbit, lsl \result
+       mov     \result, #0
+       
+#else
+
+       @ Initially shift the divisor left 3 bits if possible,
+       @ set curbit accordingly.  This allows for curbit to be located
+       @ at the left end of each 4 bit nibbles in the division loop
+       @ to save one loop in most cases.
+       tst     \divisor, #0xe0000000
+       moveq   \divisor, \divisor, lsl #3
+       moveq   \curbit, #8
+       movne   \curbit, #1
+
+       @ Unless the divisor is very big, shift it up in multiples of
+       @ four bits, since this is the amount of unwinding in the main
+       @ division loop.  Continue shifting until the divisor is 
+       @ larger than the dividend.
+1:     cmp     \divisor, #0x10000000
+       cmplo   \divisor, \dividend
+       movlo   \divisor, \divisor, lsl #4
+       movlo   \curbit, \curbit, lsl #4
+       blo     1b
+
+       @ For very big divisors, we must shift it a bit at a time, or
+       @ we will be in danger of overflowing.
+1:     cmp     \divisor, #0x80000000
+       cmplo   \divisor, \dividend
+       movlo   \divisor, \divisor, lsl #1
+       movlo   \curbit, \curbit, lsl #1
+       blo     1b
+
+       mov     \result, #0
+
+#endif
+
+       @ Division loop
+1:     cmp     \dividend, \divisor
+       subhs   \dividend, \dividend, \divisor
+       orrhs   \result,   \result,   \curbit
+       cmp     \dividend, \divisor,  lsr #1
+       subhs   \dividend, \dividend, \divisor, lsr #1
+       orrhs   \result,   \result,   \curbit,  lsr #1
+       cmp     \dividend, \divisor,  lsr #2
+       subhs   \dividend, \dividend, \divisor, lsr #2
+       orrhs   \result,   \result,   \curbit,  lsr #2
+       cmp     \dividend, \divisor,  lsr #3
+       subhs   \dividend, \dividend, \divisor, lsr #3
+       orrhs   \result,   \result,   \curbit,  lsr #3
+       cmp     \dividend, #0                   @ Early termination?
+       movnes  \curbit,   \curbit,  lsr #4     @ No, any more bits to do?
+       movne   \divisor,  \divisor, lsr #4
+       bne     1b
+
+.endm
+
+
+.macro ARM_DIV2_ORDER divisor, order
+
+#if __LINUX_ARM_ARCH__ >= 5
+
+       clz     \order, \divisor
+       rsb     \order, \order, #31
+
+#else
+
+       cmp     \divisor, #(1 << 16)
+       movhs   \divisor, \divisor, lsr #16
+       movhs   \order, #16
+       movlo   \order, #0
+
+       cmp     \divisor, #(1 << 8)
+       movhs   \divisor, \divisor, lsr #8
+       addhs   \order, \order, #8
+
+       cmp     \divisor, #(1 << 4)
+       movhs   \divisor, \divisor, lsr #4
+       addhs   \order, \order, #4
+
+       cmp     \divisor, #(1 << 2)
+       addhi   \order, \order, #3
+       addls   \order, \order, \divisor, lsr #1
+
+#endif
+
+.endm
+
+
+.macro ARM_MOD_BODY dividend, divisor, order, spare
+
+#if __LINUX_ARM_ARCH__ >= 5
+
+       clz     \order, \divisor
+       clz     \spare, \dividend
+       sub     \order, \order, \spare
+       mov     \divisor, \divisor, lsl \order
+
+#else
+
+       mov     \order, #0
+
+       @ Unless the divisor is very big, shift it up in multiples of
+       @ four bits, since this is the amount of unwinding in the main
+       @ division loop.  Continue shifting until the divisor is 
+       @ larger than the dividend.
+1:     cmp     \divisor, #0x10000000
+       cmplo   \divisor, \dividend
+       movlo   \divisor, \divisor, lsl #4
+       addlo   \order, \order, #4
+       blo     1b
+
+       @ For very big divisors, we must shift it a bit at a time, or
+       @ we will be in danger of overflowing.
+1:     cmp     \divisor, #0x80000000
+       cmplo   \divisor, \dividend
+       movlo   \divisor, \divisor, lsl #1
+       addlo   \order, \order, #1
+       blo     1b
+
+#endif
+
+       @ Perform all needed substractions to keep only the reminder.
+       @ Do comparisons in batch of 4 first.
+       subs    \order, \order, #3              @ yes, 3 is intended here
+       blt     2f
+
+1:     cmp     \dividend, \divisor
+       subhs   \dividend, \dividend, \divisor
+       cmp     \dividend, \divisor,  lsr #1
+       subhs   \dividend, \dividend, \divisor, lsr #1
+       cmp     \dividend, \divisor,  lsr #2
+       subhs   \dividend, \dividend, \divisor, lsr #2
+       cmp     \dividend, \divisor,  lsr #3
+       subhs   \dividend, \dividend, \divisor, lsr #3
+       cmp     \dividend, #1
+       mov     \divisor, \divisor, lsr #4
+       subges  \order, \order, #4
+       bge     1b
+
+       tst     \order, #3
+       teqne   \dividend, #0
+       beq     5f
+
+       @ Either 1, 2 or 3 comparison/substractions are left.
+2:     cmn     \order, #2
+       blt     4f
+       beq     3f
+       cmp     \dividend, \divisor
+       subhs   \dividend, \dividend, \divisor
+       mov     \divisor,  \divisor,  lsr #1
+3:     cmp     \dividend, \divisor
+       subhs   \dividend, \dividend, \divisor
+       mov     \divisor,  \divisor,  lsr #1
+4:     cmp     \dividend, \divisor
+       subhs   \dividend, \dividend, \divisor
+5:
+.endm
+
+
+ENTRY(__udivsi3)
+ENTRY(__aeabi_uidiv)
+UNWIND(.fnstart)
+
+       subs    r2, r1, #1
+       moveq   pc, lr
+       bcc     Ldiv0
+       cmp     r0, r1
+       bls     11f
+       tst     r1, r2
+       beq     12f
+
+       ARM_DIV_BODY r0, r1, r2, r3
+
+       mov     r0, r2
+       mov     pc, lr
+
+11:    moveq   r0, #1
+       movne   r0, #0
+       mov     pc, lr
+
+12:    ARM_DIV2_ORDER r1, r2
+
+       mov     r0, r0, lsr r2
+       mov     pc, lr
+
+UNWIND(.fnend)
+ENDPROC(__udivsi3)
+ENDPROC(__aeabi_uidiv)
+
+ENTRY(__umodsi3)
+UNWIND(.fnstart)
+
+       subs    r2, r1, #1                      @ compare divisor with 1
+       bcc     Ldiv0
+       cmpne   r0, r1                          @ compare dividend with divisor
+       moveq   r0, #0
+       tsthi   r1, r2                          @ see if divisor is power of 2
+       andeq   r0, r0, r2
+       movls   pc, lr
+
+       ARM_MOD_BODY r0, r1, r2, r3
+
+       mov     pc, lr
+
+UNWIND(.fnend)
+ENDPROC(__umodsi3)
+
+ENTRY(__divsi3)
+ENTRY(__aeabi_idiv)
+UNWIND(.fnstart)
+
+       cmp     r1, #0
+       eor     ip, r0, r1                      @ save the sign of the result.
+       beq     Ldiv0
+       rsbmi   r1, r1, #0                      @ loops below use unsigned.
+       subs    r2, r1, #1                      @ division by 1 or -1 ?
+       beq     10f
+       movs    r3, r0
+       rsbmi   r3, r0, #0                      @ positive dividend value
+       cmp     r3, r1
+       bls     11f
+       tst     r1, r2                          @ divisor is power of 2 ?
+       beq     12f
+
+       ARM_DIV_BODY r3, r1, r0, r2
+
+       cmp     ip, #0
+       rsbmi   r0, r0, #0
+       mov     pc, lr
+
+10:    teq     ip, r0                          @ same sign ?
+       rsbmi   r0, r0, #0
+       mov     pc, lr
+
+11:    movlo   r0, #0
+       moveq   r0, ip, asr #31
+       orreq   r0, r0, #1
+       mov     pc, lr
+
+12:    ARM_DIV2_ORDER r1, r2
+
+       cmp     ip, #0
+       mov     r0, r3, lsr r2
+       rsbmi   r0, r0, #0
+       mov     pc, lr
+
+UNWIND(.fnend)
+ENDPROC(__divsi3)
+ENDPROC(__aeabi_idiv)
+
+ENTRY(__modsi3)
+UNWIND(.fnstart)
+
+       cmp     r1, #0
+       beq     Ldiv0
+       rsbmi   r1, r1, #0                      @ loops below use unsigned.
+       movs    ip, r0                          @ preserve sign of dividend
+       rsbmi   r0, r0, #0                      @ if negative make positive
+       subs    r2, r1, #1                      @ compare divisor with 1
+       cmpne   r0, r1                          @ compare dividend with divisor
+       moveq   r0, #0
+       tsthi   r1, r2                          @ see if divisor is power of 2
+       andeq   r0, r0, r2
+       bls     10f
+
+       ARM_MOD_BODY r0, r1, r2, r3
+
+10:    cmp     ip, #0
+       rsbmi   r0, r0, #0
+       mov     pc, lr
+
+UNWIND(.fnend)
+ENDPROC(__modsi3)
+
+#ifdef CONFIG_AEABI
+
+ENTRY(__aeabi_uidivmod)
+UNWIND(.fnstart)
+UNWIND(.save {r0, r1, ip, lr}  )
+
+       stmfd   sp!, {r0, r1, ip, lr}
+       bl      __aeabi_uidiv
+       ldmfd   sp!, {r1, r2, ip, lr}
+       mul     r3, r0, r2
+       sub     r1, r1, r3
+       mov     pc, lr
+
+UNWIND(.fnend)
+ENDPROC(__aeabi_uidivmod)
+
+ENTRY(__aeabi_idivmod)
+UNWIND(.fnstart)
+UNWIND(.save {r0, r1, ip, lr}  )
+       stmfd   sp!, {r0, r1, ip, lr}
+       bl      __aeabi_idiv
+       ldmfd   sp!, {r1, r2, ip, lr}
+       mul     r3, r0, r2
+       sub     r1, r1, r3
+       mov     pc, lr
+
+UNWIND(.fnend)
+ENDPROC(__aeabi_idivmod)
+
+ENTRY(__aeabi_uldivmod)
+UNWIND(.fnstart)
+UNWIND(.save {lr}      )
+       sub sp, sp, #8
+       stmfd   sp!, {sp, lr}
+       bl __qdivrem
+       ldr lr, [sp, #4]
+       add sp, sp, #8
+       ldmfd sp!, {r2, r3}
+       mov     pc, lr
+
+UNWIND(.fnend)
+ENDPROC(__aeabi_uldivmod)
+
+ENTRY(__aeabi_ldivmod)
+UNWIND(.fnstart)
+UNWIND(.save {lr}      )
+       sub sp, sp, #16
+       stmfd   sp!, {sp, lr}
+       bl __ldivmod_helper
+       ldr lr, [sp, #4]
+       add sp, sp, #16
+       ldmfd   sp!, {r2, r3}
+       mov     pc, lr
+       
+UNWIND(.fnend)
+ENDPROC(__aeabi_ldivmod)
+#endif
+
+Ldiv0:
+UNWIND(.fnstart)
+UNWIND(.pad #4)
+UNWIND(.save {lr})
+       str     lr, [sp, #-8]!
+       bl      __div0
+       mov     r0, #0                  @ About as wrong as it could be.
+       ldr     pc, [sp], #8
+UNWIND(.fnend)
+ENDPROC(Ldiv0)
diff -r 984086ca8ca0 -r b094ba4bf985 xen/arch/arm/arm32/lib/lshrdi3.S
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/arch/arm/arm32/lib/lshrdi3.S  Wed Dec 19 14:16:30 2012 +0000
@@ -0,0 +1,54 @@
+/* Copyright 1995, 1996, 1998, 1999, 2000, 2003, 2004, 2005
+   Free Software Foundation, Inc.
+
+This file is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the
+Free Software Foundation; either version 2, or (at your option) any
+later version.
+
+In addition to the permissions in the GNU General Public License, the
+Free Software Foundation gives you unlimited permission to link the
+compiled version of this file into combinations with other programs,
+and to distribute those combinations without any restriction coming
+from the use of this file.  (The General Public License restrictions
+do apply in other respects; for example, they cover modification of
+the file, and distribution when not linked into a combine
+executable.)
+
+This file is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; see the file COPYING.  If not, write to
+the Free Software Foundation, 51 Franklin Street, Fifth Floor,
+Boston, MA 02110-1301, USA.  */
+
+
+#include <xen/config.h>
+#include "assembler.h"
+
+#ifdef __ARMEB__
+#define al r1
+#define ah r0
+#else
+#define al r0
+#define ah r1
+#endif
+
+ENTRY(__lshrdi3)
+ENTRY(__aeabi_llsr)
+
+       subs    r3, r2, #32
+       rsb     ip, r2, #32
+       movmi   al, al, lsr r2
+       movpl   al, ah, lsr r3
+ ARM(  orrmi   al, al, ah, lsl ip      )
+ THUMB(        lslmi   r3, ah, ip              )
+ THUMB(        orrmi   al, al, r3              )
+       mov     ah, ah, lsr r2
+       mov     pc, lr
+
+ENDPROC(__lshrdi3)
+ENDPROC(__aeabi_llsr)
diff -r 984086ca8ca0 -r b094ba4bf985 xen/arch/arm/arm32/lib/memcpy.S
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/arch/arm/arm32/lib/memcpy.S   Wed Dec 19 14:16:30 2012 +0000
@@ -0,0 +1,63 @@
+/*
+ *  linux/arch/arm/lib/memcpy.S
+ *
+ *  Author:    Nicolas Pitre
+ *  Created:   Sep 28, 2005
+ *  Copyright: MontaVista Software, Inc.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License version 2 as
+ *  published by the Free Software Foundation.
+ */
+
+#include <xen/config.h>
+#include "assembler.h"
+
+#define LDR1W_SHIFT    0
+#define STR1W_SHIFT    0
+
+       .macro ldr1w ptr reg abort
+       W(ldr) \reg, [\ptr], #4
+       .endm
+
+       .macro ldr4w ptr reg1 reg2 reg3 reg4 abort
+       ldmia \ptr!, {\reg1, \reg2, \reg3, \reg4}
+       .endm
+
+       .macro ldr8w ptr reg1 reg2 reg3 reg4 reg5 reg6 reg7 reg8 abort
+       ldmia \ptr!, {\reg1, \reg2, \reg3, \reg4, \reg5, \reg6, \reg7, \reg8}
+       .endm
+
+       .macro ldr1b ptr reg cond=al abort
+       ldr\cond\()b \reg, [\ptr], #1
+       .endm
+
+       .macro str1w ptr reg abort
+       W(str) \reg, [\ptr], #4
+       .endm
+
+       .macro str8w ptr reg1 reg2 reg3 reg4 reg5 reg6 reg7 reg8 abort
+       stmia \ptr!, {\reg1, \reg2, \reg3, \reg4, \reg5, \reg6, \reg7, \reg8}
+       .endm
+
+       .macro str1b ptr reg cond=al abort
+       str\cond\()b \reg, [\ptr], #1
+       .endm
+
+       .macro enter reg1 reg2
+       stmdb sp!, {r0, \reg1, \reg2}
+       .endm
+
+       .macro exit reg1 reg2
+       ldmfd sp!, {r0, \reg1, \reg2}
+       .endm
+
+       .text
+
+/* Prototype: void *memcpy(void *dest, const void *src, size_t n); */
+
+ENTRY(memcpy)
+
+#include "copy_template.S"
+
+ENDPROC(memcpy)
diff -r 984086ca8ca0 -r b094ba4bf985 xen/arch/arm/arm32/lib/memmove.S
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/arch/arm/arm32/lib/memmove.S  Wed Dec 19 14:16:30 2012 +0000
@@ -0,0 +1,200 @@
+/*
+ *  linux/arch/arm/lib/memmove.S
+ *
+ *  Author:    Nicolas Pitre
+ *  Created:   Sep 28, 2005
+ *  Copyright: (C) MontaVista Software Inc.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License version 2 as
+ *  published by the Free Software Foundation.
+ */
+
+#include <xen/config.h>
+
+#include "assembler.h"
+
+               .text
+
+/*
+ * Prototype: void *memmove(void *dest, const void *src, size_t n);
+ *
+ * Note:
+ *
+ * If the memory regions don't overlap, we simply branch to memcpy which is
+ * normally a bit faster. Otherwise the copy is done going downwards.  This
+ * is a transposition of the code from copy_template.S but with the copy
+ * occurring in the opposite direction.
+ */
+
+ENTRY(memmove)
+
+               subs    ip, r0, r1
+               cmphi   r2, ip
+               bls     memcpy
+
+               stmfd   sp!, {r0, r4, lr}
+               add     r1, r1, r2
+               add     r0, r0, r2
+               subs    r2, r2, #4
+               blt     8f
+               ands    ip, r0, #3
+       PLD(    pld     [r1, #-4]               )
+               bne     9f
+               ands    ip, r1, #3
+               bne     10f
+
+1:             subs    r2, r2, #(28)
+               stmfd   sp!, {r5 - r8}
+               blt     5f
+
+       CALGN(  ands    ip, r0, #31             )
+       CALGN(  sbcnes  r4, ip, r2              )  @ C is always set here
+       CALGN(  bcs     2f                      )
+       CALGN(  adr     r4, 6f                  )
+       CALGN(  subs    r2, r2, ip              )  @ C is set here
+       CALGN(  rsb     ip, ip, #32             )
+       CALGN(  add     pc, r4, ip              )
+
+       PLD(    pld     [r1, #-4]               )
+2:     PLD(    subs    r2, r2, #96             )
+       PLD(    pld     [r1, #-32]              )
+       PLD(    blt     4f                      )
+       PLD(    pld     [r1, #-64]              )
+       PLD(    pld     [r1, #-96]              )
+
+3:     PLD(    pld     [r1, #-128]             )
+4:             ldmdb   r1!, {r3, r4, r5, r6, r7, r8, ip, lr}
+               subs    r2, r2, #32
+               stmdb   r0!, {r3, r4, r5, r6, r7, r8, ip, lr}
+               bge     3b
+       PLD(    cmn     r2, #96                 )
+       PLD(    bge     4b                      )
+
+5:             ands    ip, r2, #28
+               rsb     ip, ip, #32
+               addne   pc, pc, ip              @ C is always clear here
+               b       7f
+6:             W(nop)
+               W(ldr)  r3, [r1, #-4]!
+               W(ldr)  r4, [r1, #-4]!
+               W(ldr)  r5, [r1, #-4]!
+               W(ldr)  r6, [r1, #-4]!
+               W(ldr)  r7, [r1, #-4]!
+               W(ldr)  r8, [r1, #-4]!
+               W(ldr)  lr, [r1, #-4]!
+
+               add     pc, pc, ip
+               nop
+               W(nop)
+               W(str)  r3, [r0, #-4]!
+               W(str)  r4, [r0, #-4]!
+               W(str)  r5, [r0, #-4]!
+               W(str)  r6, [r0, #-4]!
+               W(str)  r7, [r0, #-4]!
+               W(str)  r8, [r0, #-4]!
+               W(str)  lr, [r0, #-4]!
+
+       CALGN(  bcs     2b                      )
+
+7:             ldmfd   sp!, {r5 - r8}
+
+8:             movs    r2, r2, lsl #31
+               ldrneb  r3, [r1, #-1]!
+               ldrcsb  r4, [r1, #-1]!
+               ldrcsb  ip, [r1, #-1]
+               strneb  r3, [r0, #-1]!
+               strcsb  r4, [r0, #-1]!
+               strcsb  ip, [r0, #-1]
+               ldmfd   sp!, {r0, r4, pc}
+
+9:             cmp     ip, #2
+               ldrgtb  r3, [r1, #-1]!
+               ldrgeb  r4, [r1, #-1]!
+               ldrb    lr, [r1, #-1]!
+               strgtb  r3, [r0, #-1]!
+               strgeb  r4, [r0, #-1]!
+               subs    r2, r2, ip
+               strb    lr, [r0, #-1]!
+               blt     8b
+               ands    ip, r1, #3
+               beq     1b
+
+10:            bic     r1, r1, #3
+               cmp     ip, #2
+               ldr     r3, [r1, #0]
+               beq     17f
+               blt     18f
+
+
+               .macro  backward_copy_shift push pull
+
+               subs    r2, r2, #28
+               blt     14f
+
+       CALGN(  ands    ip, r0, #31             )
+       CALGN(  sbcnes  r4, ip, r2              )  @ C is always set here
+       CALGN(  subcc   r2, r2, ip              )
+       CALGN(  bcc     15f                     )
+
+11:            stmfd   sp!, {r5 - r9}
+
+       PLD(    pld     [r1, #-4]               )
+       PLD(    subs    r2, r2, #96             )
+       PLD(    pld     [r1, #-32]              )
+       PLD(    blt     13f                     )
+       PLD(    pld     [r1, #-64]              )
+       PLD(    pld     [r1, #-96]              )
+
+12:    PLD(    pld     [r1, #-128]             )
+13:            ldmdb   r1!, {r7, r8, r9, ip}
+               mov     lr, r3, push #\push
+               subs    r2, r2, #32
+               ldmdb   r1!, {r3, r4, r5, r6}
+               orr     lr, lr, ip, pull #\pull
+               mov     ip, ip, push #\push
+               orr     ip, ip, r9, pull #\pull
+               mov     r9, r9, push #\push
+               orr     r9, r9, r8, pull #\pull
+               mov     r8, r8, push #\push
+               orr     r8, r8, r7, pull #\pull
+               mov     r7, r7, push #\push
+               orr     r7, r7, r6, pull #\pull
+               mov     r6, r6, push #\push
+               orr     r6, r6, r5, pull #\pull
+               mov     r5, r5, push #\push
+               orr     r5, r5, r4, pull #\pull
+               mov     r4, r4, push #\push
+               orr     r4, r4, r3, pull #\pull
+               stmdb   r0!, {r4 - r9, ip, lr}
+               bge     12b
+       PLD(    cmn     r2, #96                 )
+       PLD(    bge     13b                     )
+
+               ldmfd   sp!, {r5 - r9}
+
+14:            ands    ip, r2, #28
+               beq     16f
+
+15:            mov     lr, r3, push #\push
+               ldr     r3, [r1, #-4]!
+               subs    ip, ip, #4
+               orr     lr, lr, r3, pull #\pull
+               str     lr, [r0, #-4]!
+               bgt     15b
+       CALGN(  cmp     r2, #0                  )
+       CALGN(  bge     11b                     )
+
+16:            add     r1, r1, #(\pull / 8)
+               b       8b
+
+               .endm
+
+
+               backward_copy_shift     push=8  pull=24
+
+17:            backward_copy_shift     push=16 pull=16
+
+18:            backward_copy_shift     push=24 pull=8
+
+ENDPROC(memmove)
diff -r 984086ca8ca0 -r b094ba4bf985 xen/arch/arm/arm32/lib/memset.S
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/arch/arm/arm32/lib/memset.S   Wed Dec 19 14:16:30 2012 +0000
@@ -0,0 +1,129 @@
+/*
+ *  linux/arch/arm/lib/memset.S
+ *
+ *  Copyright (C) 1995-2000 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ *  ASM optimised string functions
+ */
+
+#include <xen/config.h>
+
+#include "assembler.h"
+
+       .text
+       .align  5
+       .word   0
+
+1:     subs    r2, r2, #4              @ 1 do we have enough
+       blt     5f                      @ 1 bytes to align with?
+       cmp     r3, #2                  @ 1
+       strltb  r1, [r0], #1            @ 1
+       strleb  r1, [r0], #1            @ 1
+       strb    r1, [r0], #1            @ 1
+       add     r2, r2, r3              @ 1 (r2 = r2 - (4 - r3))
+/*
+ * The pointer is now aligned and the length is adjusted.  Try doing the
+ * memset again.
+ */
+
+ENTRY(memset)
+       ands    r3, r0, #3              @ 1 unaligned?
+       bne     1b                      @ 1
+/*
+ * we know that the pointer in r0 is aligned to a word boundary.
+ */
+       orr     r1, r1, r1, lsl #8
+       orr     r1, r1, r1, lsl #16
+       mov     r3, r1
+       cmp     r2, #16
+       blt     4f
+
+#if ! CALGN(1)+0
+
+/*
+ * We need an extra register for this loop - save the return address and
+ * use the LR
+ */
+       str     lr, [sp, #-4]!
+       mov     ip, r1
+       mov     lr, r1
+
+2:     subs    r2, r2, #64
+       stmgeia r0!, {r1, r3, ip, lr}   @ 64 bytes at a time.
+       stmgeia r0!, {r1, r3, ip, lr}
+       stmgeia r0!, {r1, r3, ip, lr}
+       stmgeia r0!, {r1, r3, ip, lr}
+       bgt     2b
+       ldmeqfd sp!, {pc}               @ Now <64 bytes to go.
+/*
+ * No need to correct the count; we're only testing bits from now on
+ */
+       tst     r2, #32
+       stmneia r0!, {r1, r3, ip, lr}
+       stmneia r0!, {r1, r3, ip, lr}
+       tst     r2, #16
+       stmneia r0!, {r1, r3, ip, lr}
+       ldr     lr, [sp], #4
+
+#else
+
+/*
+ * This version aligns the destination pointer in order to write
+ * whole cache lines at once.
+ */
+
+       stmfd   sp!, {r4-r7, lr}
+       mov     r4, r1
+       mov     r5, r1
+       mov     r6, r1
+       mov     r7, r1
+       mov     ip, r1
+       mov     lr, r1
+
+       cmp     r2, #96
+       tstgt   r0, #31
+       ble     3f
+
+       and     ip, r0, #31
+       rsb     ip, ip, #32
+       sub     r2, r2, ip
+       movs    ip, ip, lsl #(32 - 4)
+       stmcsia r0!, {r4, r5, r6, r7}
+       stmmiia r0!, {r4, r5}
+       tst     ip, #(1 << 30)
+       mov     ip, r1
+       strne   r1, [r0], #4
+
+3:     subs    r2, r2, #64
+       stmgeia r0!, {r1, r3-r7, ip, lr}
+       stmgeia r0!, {r1, r3-r7, ip, lr}
+       bgt     3b
+       ldmeqfd sp!, {r4-r7, pc}
+
+       tst     r2, #32
+       stmneia r0!, {r1, r3-r7, ip, lr}
+       tst     r2, #16
+       stmneia r0!, {r4-r7}
+       ldmfd   sp!, {r4-r7, lr}
+
+#endif
+
+4:     tst     r2, #8
+       stmneia r0!, {r1, r3}
+       tst     r2, #4
+       strne   r1, [r0], #4
+/*
+ * When we get here, we've got less than 4 bytes to zero.  We
+ * may have an unaligned pointer as well.
+ */
+5:     tst     r2, #2
+       strneb  r1, [r0], #1
+       strneb  r1, [r0], #1
+       tst     r2, #1
+       strneb  r1, [r0], #1
+       mov     pc, lr
+ENDPROC(memset)
diff -r 984086ca8ca0 -r b094ba4bf985 xen/arch/arm/arm32/lib/memzero.S
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/arch/arm/arm32/lib/memzero.S  Wed Dec 19 14:16:30 2012 +0000
@@ -0,0 +1,127 @@
+/*
+ *  linux/arch/arm/lib/memzero.S
+ *
+ *  Copyright (C) 1995-2000 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <xen/config.h>
+
+#include "assembler.h"
+
+       .text
+       .align  5
+       .word   0
+/*
+ * Align the pointer in r0.  r3 contains the number of bytes that we are
+ * mis-aligned by, and r1 is the number of bytes.  If r1 < 4, then we
+ * don't bother; we use byte stores instead.
+ */
+1:     subs    r1, r1, #4              @ 1 do we have enough
+       blt     5f                      @ 1 bytes to align with?
+       cmp     r3, #2                  @ 1
+       strltb  r2, [r0], #1            @ 1
+       strleb  r2, [r0], #1            @ 1
+       strb    r2, [r0], #1            @ 1
+       add     r1, r1, r3              @ 1 (r1 = r1 - (4 - r3))
+/*
+ * The pointer is now aligned and the length is adjusted.  Try doing the
+ * memzero again.
+ */
+
+ENTRY(__memzero)
+       mov     r2, #0                  @ 1
+       ands    r3, r0, #3              @ 1 unaligned?
+       bne     1b                      @ 1
+/*
+ * r3 = 0, and we know that the pointer in r0 is aligned to a word boundary.
+ */
+       cmp     r1, #16                 @ 1 we can skip this chunk if we
+       blt     4f                      @ 1 have < 16 bytes
+
+#if ! CALGN(1)+0
+
+/*
+ * We need an extra register for this loop - save the return address and
+ * use the LR
+ */
+       str     lr, [sp, #-4]!          @ 1
+       mov     ip, r2                  @ 1
+       mov     lr, r2                  @ 1
+
+3:     subs    r1, r1, #64             @ 1 write 32 bytes out per loop
+       stmgeia r0!, {r2, r3, ip, lr}   @ 4
+       stmgeia r0!, {r2, r3, ip, lr}   @ 4
+       stmgeia r0!, {r2, r3, ip, lr}   @ 4
+       stmgeia r0!, {r2, r3, ip, lr}   @ 4
+       bgt     3b                      @ 1
+       ldmeqfd sp!, {pc}               @ 1/2 quick exit
+/*
+ * No need to correct the count; we're only testing bits from now on
+ */
+       tst     r1, #32                 @ 1
+       stmneia r0!, {r2, r3, ip, lr}   @ 4
+       stmneia r0!, {r2, r3, ip, lr}   @ 4
+       tst     r1, #16                 @ 1 16 bytes or more?
+       stmneia r0!, {r2, r3, ip, lr}   @ 4
+       ldr     lr, [sp], #4            @ 1
+
+#else
+
+/*
+ * This version aligns the destination pointer in order to write
+ * whole cache lines at once.
+ */
+
+       stmfd   sp!, {r4-r7, lr}
+       mov     r4, r2
+       mov     r5, r2
+       mov     r6, r2
+       mov     r7, r2
+       mov     ip, r2
+       mov     lr, r2
+
+       cmp     r1, #96
+       andgts  ip, r0, #31
+       ble     3f
+
+       rsb     ip, ip, #32
+       sub     r1, r1, ip
+       movs    ip, ip, lsl #(32 - 4)
+       stmcsia r0!, {r4, r5, r6, r7}
+       stmmiia r0!, {r4, r5}
+       movs    ip, ip, lsl #2
+       strcs   r2, [r0], #4
+
+3:     subs    r1, r1, #64
+       stmgeia r0!, {r2-r7, ip, lr}
+       stmgeia r0!, {r2-r7, ip, lr}
+       bgt     3b
+       ldmeqfd sp!, {r4-r7, pc}
+
+       tst     r1, #32
+       stmneia r0!, {r2-r7, ip, lr}
+       tst     r1, #16
+       stmneia r0!, {r4-r7}
+       ldmfd   sp!, {r4-r7, lr}
+
+#endif
+
+4:     tst     r1, #8                  @ 1 8 bytes or more?
+       stmneia r0!, {r2, r3}           @ 2
+       tst     r1, #4                  @ 1 4 bytes or more?
+       strne   r2, [r0], #4            @ 1
+/*
+ * When we get here, we've got less than 4 bytes to zero.  We
+ * may have an unaligned pointer as well.
+ */
+5:     tst     r1, #2                  @ 1 2 bytes or more?
+       strneb  r2, [r0], #1            @ 1
+       strneb  r2, [r0], #1            @ 1
+       tst     r1, #1                  @ 1 a byte left over
+       strneb  r2, [r0], #1            @ 1
+       mov     pc, lr                  @ 1
+ENDPROC(__memzero)
diff -r 984086ca8ca0 -r b094ba4bf985 xen/arch/arm/arm32/lib/setbit.S
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/arch/arm/arm32/lib/setbit.S   Wed Dec 19 14:16:30 2012 +0000
@@ -0,0 +1,18 @@
+/*
+ *  linux/arch/arm/lib/setbit.S
+ *
+ *  Copyright (C) 1995-1996 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <xen/config.h>
+
+#include "assembler.h"
+#include "bitops.h"
+       .text
+
+ENTRY(_set_bit)
+       bitop   orr
+ENDPROC(_set_bit)
diff -r 984086ca8ca0 -r b094ba4bf985 xen/arch/arm/arm32/lib/testchangebit.S
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/arch/arm/arm32/lib/testchangebit.S    Wed Dec 19 14:16:30 2012 +0000
@@ -0,0 +1,18 @@
+/*
+ *  linux/arch/arm/lib/testchangebit.S
+ *
+ *  Copyright (C) 1995-1996 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <xen/config.h>
+
+#include "assembler.h"
+#include "bitops.h"
+                .text
+
+ENTRY(_test_and_change_bit)
+       testop  eor, str
+ENDPROC(_test_and_change_bit)
diff -r 984086ca8ca0 -r b094ba4bf985 xen/arch/arm/arm32/lib/testclearbit.S
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/arch/arm/arm32/lib/testclearbit.S     Wed Dec 19 14:16:30 2012 +0000
@@ -0,0 +1,18 @@
+/*
+ *  linux/arch/arm/lib/testclearbit.S
+ *
+ *  Copyright (C) 1995-1996 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <xen/config.h>
+
+#include "assembler.h"
+#include "bitops.h"
+                .text
+
+ENTRY(_test_and_clear_bit)
+       testop  bicne, strne
+ENDPROC(_test_and_clear_bit)
diff -r 984086ca8ca0 -r b094ba4bf985 xen/arch/arm/arm32/lib/testsetbit.S
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/arch/arm/arm32/lib/testsetbit.S       Wed Dec 19 14:16:30 2012 +0000
@@ -0,0 +1,18 @@
+/*
+ *  linux/arch/arm/lib/testsetbit.S
+ *
+ *  Copyright (C) 1995-1996 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <xen/config.h>
+
+#include "assembler.h"
+#include "bitops.h"
+                .text
+
+ENTRY(_test_and_set_bit)
+       testop  orreq, streq
+ENDPROC(_test_and_set_bit)
diff -r 984086ca8ca0 -r b094ba4bf985 xen/arch/arm/arm32/mode_switch.S
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/arch/arm/arm32/mode_switch.S  Wed Dec 19 14:16:30 2012 +0000
@@ -0,0 +1,121 @@
+/*
+ * xen/arch/arm/mode_switch.S
+ *
+ * Start-of day code to take a CPU from Secure mode to Hyp mode.
+ *
+ * Tim Deegan <tim@xxxxxxx>
+ * Copyright (c) 2011-2012 Citrix Systems.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <asm/config.h>
+#include <asm/page.h>
+#include <asm/platform_vexpress.h>
+#include <asm/asm_defns.h>
+#include <asm/gic.h>
+
+
+/* XXX: Versatile Express specific code */
+/* wake up secondary cpus */
+.globl kick_cpus
+kick_cpus:
+        /* write start paddr to v2m sysreg FLAGSSET register */
+        ldr   r0, =(V2M_SYS_MMIO_BASE)        /* base V2M sysreg MMIO address 
*/
+        dsb
+        mov   r2, #0xffffffff
+        str   r2, [r0, #(V2M_SYS_FLAGSCLR)]
+        dsb
+        ldr   r2, =start
+        add   r2, r2, r10
+        str   r2, [r0, #(V2M_SYS_FLAGSSET)]
+        dsb
+        /* send an interrupt */
+        ldr   r0, =(GIC_BASE_ADDRESS + GIC_DR_OFFSET) /* base GICD MMIO 
address */
+        mov   r2, #0x1
+        str   r2, [r0, #(GICD_CTLR * 4)]      /* enable distributor */
+        mov   r2, #0xfe0000
+        str   r2, [r0, #(GICD_SGIR * 4)]      /* send IPI to everybody */
+        dsb
+        str   r2, [r0, #(GICD_CTLR * 4)]      /* disable distributor */
+        mov   pc, lr
+
+
+/* Get up a CPU into Hyp mode.  Clobbers r0-r3.
+ *
+ * Expects r12 == CPU number
+ *
+ * This code is specific to the VE model, and not intended to be used
+ * on production systems.  As such it's a bit hackier than the main
+ * boot code in head.S.  In future it will be replaced by better
+ * integration with the bootloader/firmware so that Xen always starts
+ * in Hyp mode. */
+
+.globl enter_hyp_mode
+enter_hyp_mode:
+        mov   r3, lr                 /* Put return address in non-banked reg */
+        cpsid aif, #0x16             /* Enter Monitor mode */
+        mrc   CP32(r0, SCR)
+        orr   r0, r0, #0x100         /* Set HCE */
+        orr   r0, r0, #0xb1          /* Set SCD, AW, FW and NS */
+        bic   r0, r0, #0xe           /* Clear EA, FIQ and IRQ */
+        mcr   CP32(r0, SCR)
+        /* Ugly: the system timer's frequency register is only
+         * programmable in Secure state.  Since we don't know where its
+         * memory-mapped control registers live, we can't find out the
+         * right frequency.  Use the VE model's default frequency here. */
+        ldr   r0, =0x5f5e100         /* 100 MHz */
+        mcr   CP32(r0, CNTFRQ)
+        ldr   r0, =0x40c00           /* SMP, c11, c10 in non-secure mode */
+        mcr   CP32(r0, NSACR)
+        mov   r0, #GIC_BASE_ADDRESS
+        add   r0, r0, #GIC_DR_OFFSET
+        /* Disable the GIC distributor, on the boot CPU only */
+        mov   r1, #0
+        teq   r12, #0                /* Is this the boot CPU? */
+        streq r1, [r0]
+        /* Continuing ugliness: Set up the GIC so NS state owns interrupts,
+         * The first 32 interrupts (SGIs & PPIs) must be configured on all
+         * CPUs while the remainder are SPIs and only need to be done one, on
+         * the boot CPU. */
+        add   r0, r0, #0x80          /* GICD_IGROUP0 */
+        mov   r2, #0xffffffff        /* All interrupts to group 1 */
+        teq   r12, #0                /* Boot CPU? */
+        str   r2, [r0]               /* Interrupts  0-31 (SGI & PPI) */
+        streq r2, [r0, #4]           /* Interrupts 32-63 (SPI) */
+        streq r2, [r0, #8]           /* Interrupts 64-95 (SPI) */
+        /* Disable the GIC CPU interface on all processors */
+        mov   r0, #GIC_BASE_ADDRESS
+        add   r0, r0, #GIC_CR_OFFSET
+        mov   r1, #0
+        str   r1, [r0]
+        /* Must drop priority mask below 0x80 before entering NS state */
+        ldr   r1, =0xff
+        str   r1, [r0, #0x4]         /* -> GICC_PMR */
+        /* Reset a few config registers */
+        mov   r0, #0
+        mcr   CP32(r0, FCSEIDR)
+        mcr   CP32(r0, CONTEXTIDR)
+        /* Allow non-secure access to coprocessors, FIQs, VFP and NEON */
+        ldr   r1, =0x3fff            /* 14 CP bits set, all others clear */
+        mcr   CP32(r1, NSACR)
+
+        mrs   r0, cpsr               /* Copy the CPSR */
+        add   r0, r0, #0x4           /* 0x16 (Monitor) -> 0x1a (Hyp) */
+        msr   spsr_cxsf, r0          /* into the SPSR */
+        movs  pc, r3                 /* Exception-return into Hyp mode */
+
+/*
+ * Local variables:
+ * mode: ASM
+ * indent-tabs-mode: nil
+ * End:
+ */
diff -r 984086ca8ca0 -r b094ba4bf985 xen/arch/arm/arm32/proc-ca15.S
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/arch/arm/arm32/proc-ca15.S    Wed Dec 19 14:16:30 2012 +0000
@@ -0,0 +1,35 @@
+/*
+ * xen/arch/arm/proc-ca15.S
+ *
+ * Cortex A15 specific initializations
+ *
+ * Copyright (c) 2011 Citrix Systems.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <asm/asm_defns.h>
+#include <asm/processor-ca15.h>
+
+.globl cortex_a15_init
+cortex_a15_init:
+        /* Set up the SMP bit in ACTLR */
+        mrc   CP32(r0, ACTLR)
+        orr   r0, r0, #(ACTLR_CA15_SMP) /* enable SMP bit */
+        mcr   CP32(r0, ACTLR)
+        mov   pc, lr
+
+/*
+ * Local variables:
+ * mode: ASM
+ * indent-tabs-mode: nil
+ * End:
+ */
diff -r 984086ca8ca0 -r b094ba4bf985 xen/arch/arm/asm-offsets.c
--- a/xen/arch/arm/asm-offsets.c        Wed Dec 19 14:16:29 2012 +0000
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,80 +0,0 @@
-/*
- * Generate definitions needed by assembly language modules.
- * This code generates raw asm output which is post-processed
- * to extract and format the required data.
- */
-#define COMPILE_OFFSETS
-
-#include <xen/config.h>
-#include <xen/types.h>
-#include <xen/sched.h>
-#include <public/xen.h>
-#include <asm/current.h>
-
-#define DEFINE(_sym, _val) \
-    __asm__ __volatile__ ( "\n->" #_sym " %0 " #_val : : "i" (_val) )
-#define BLANK() \
-    __asm__ __volatile__ ( "\n->" : : )
-#define OFFSET(_sym, _str, _mem) \
-    DEFINE(_sym, offsetof(_str, _mem));
-
-/* base-2 logarithm */
-#define __L2(_x)  (((_x) & 0x00000002) ?   1 : 0)
-#define __L4(_x)  (((_x) & 0x0000000c) ? ( 2 + __L2( (_x)>> 2)) : __L2( _x))
-#define __L8(_x)  (((_x) & 0x000000f0) ? ( 4 + __L4( (_x)>> 4)) : __L4( _x))
-#define __L16(_x) (((_x) & 0x0000ff00) ? ( 8 + __L8( (_x)>> 8)) : __L8( _x))
-#define LOG_2(_x) (((_x) & 0xffff0000) ? (16 + __L16((_x)>>16)) : __L16(_x))
-
-void __dummy__(void)
-{
-   OFFSET(UREGS_sp, struct cpu_user_regs, sp);
-   OFFSET(UREGS_lr, struct cpu_user_regs, lr);
-   OFFSET(UREGS_pc, struct cpu_user_regs, pc);
-   OFFSET(UREGS_cpsr, struct cpu_user_regs, cpsr);
-
-   OFFSET(UREGS_LR_usr, struct cpu_user_regs, lr_usr);
-   OFFSET(UREGS_SP_usr, struct cpu_user_regs, sp_usr);
-
-   OFFSET(UREGS_SP_svc, struct cpu_user_regs, sp_svc);
-   OFFSET(UREGS_LR_svc, struct cpu_user_regs, lr_svc);
-   OFFSET(UREGS_SPSR_svc, struct cpu_user_regs, spsr_svc);
-
-   OFFSET(UREGS_SP_abt, struct cpu_user_regs, sp_abt);
-   OFFSET(UREGS_LR_abt, struct cpu_user_regs, lr_abt);
-   OFFSET(UREGS_SPSR_abt, struct cpu_user_regs, spsr_abt);
-
-   OFFSET(UREGS_SP_und, struct cpu_user_regs, sp_und);
-   OFFSET(UREGS_LR_und, struct cpu_user_regs, lr_und);
-   OFFSET(UREGS_SPSR_und, struct cpu_user_regs, spsr_und);
-
-   OFFSET(UREGS_SP_irq, struct cpu_user_regs, sp_irq);
-   OFFSET(UREGS_LR_irq, struct cpu_user_regs, lr_irq);
-   OFFSET(UREGS_SPSR_irq, struct cpu_user_regs, spsr_irq);
-
-   OFFSET(UREGS_SP_fiq, struct cpu_user_regs, sp_fiq);
-   OFFSET(UREGS_LR_fiq, struct cpu_user_regs, lr_fiq);
-   OFFSET(UREGS_SPSR_fiq, struct cpu_user_regs, spsr_fiq);
-
-   OFFSET(UREGS_R8_fiq, struct cpu_user_regs, r8_fiq);
-   OFFSET(UREGS_R9_fiq, struct cpu_user_regs, r9_fiq);
-   OFFSET(UREGS_R10_fiq, struct cpu_user_regs, r10_fiq);
-   OFFSET(UREGS_R11_fiq, struct cpu_user_regs, r11_fiq);
-   OFFSET(UREGS_R12_fiq, struct cpu_user_regs, r12_fiq);
-
-   OFFSET(UREGS_kernel_sizeof, struct cpu_user_regs, cpsr);
-   DEFINE(UREGS_user_sizeof, sizeof(struct cpu_user_regs));
-   BLANK();
-
-   DEFINE(CPUINFO_sizeof, sizeof(struct cpu_info));
-
-   OFFSET(VCPU_arch_saved_context, struct vcpu, arch.saved_context);
-}
-
-/*
- * Local variables:
- * mode: C
- * c-set-style: "BSD"
- * c-basic-offset: 4
- * indent-tabs-mode: nil
- * End:
- */
diff -r 984086ca8ca0 -r b094ba4bf985 xen/arch/arm/domain.c
--- a/xen/arch/arm/domain.c     Wed Dec 19 14:16:29 2012 +0000
+++ b/xen/arch/arm/domain.c     Wed Dec 19 14:16:30 2012 +0000
@@ -12,7 +12,7 @@
 #include <asm/p2m.h>
 #include <asm/irq.h>
 
-#include "gic.h"
+#include <asm/gic.h>
 #include "vtimer.h"
 #include "vpl011.h"
 
diff -r 984086ca8ca0 -r b094ba4bf985 xen/arch/arm/domain_build.c
--- a/xen/arch/arm/domain_build.c       Wed Dec 19 14:16:29 2012 +0000
+++ b/xen/arch/arm/domain_build.c       Wed Dec 19 14:16:30 2012 +0000
@@ -11,7 +11,7 @@
 #include <xen/libfdt/libfdt.h>
 #include <xen/guest_access.h>
 
-#include "gic.h"
+#include <asm/gic.h>
 #include "kernel.h"
 
 static unsigned int __initdata opt_dom0_max_vcpus;
diff -r 984086ca8ca0 -r b094ba4bf985 xen/arch/arm/entry.S
--- a/xen/arch/arm/entry.S      Wed Dec 19 14:16:29 2012 +0000
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,141 +0,0 @@
-#include <xen/config.h>
-#include <asm/asm_defns.h>
-#include <public/xen.h>
-
-#define SAVE_ONE_BANKED(reg)    mrs r11, reg; str r11, [sp, #UREGS_##reg]
-#define RESTORE_ONE_BANKED(reg) ldr r11, [sp, #UREGS_##reg]; msr reg, r11
-
-#define SAVE_BANKED(mode) \
-        SAVE_ONE_BANKED(SP_##mode) ; SAVE_ONE_BANKED(LR_##mode) ; 
SAVE_ONE_BANKED(SPSR_##mode)
-
-#define RESTORE_BANKED(mode) \
-        RESTORE_ONE_BANKED(SP_##mode) ; RESTORE_ONE_BANKED(LR_##mode) ; 
RESTORE_ONE_BANKED(SPSR_##mode)
-
-#define SAVE_ALL                                                        \
-        sub sp, #(UREGS_SP_usr - UREGS_sp); /* SP, LR, SPSR, PC */      \
-        push {r0-r12}; /* Save R0-R12 */                                \
-                                                                        \
-        mrs r11, ELR_hyp;               /* ELR_hyp is return address. */\
-        str r11, [sp, #UREGS_pc];                                       \
-                                                                        \
-        str lr, [sp, #UREGS_lr];                                        \
-                                                                        \
-        add r11, sp, #UREGS_kernel_sizeof+4;                            \
-        str r11, [sp, #UREGS_sp];                                       \
-                                                                        \
-        mrs r11, SPSR_hyp;                                              \
-        str r11, [sp, #UREGS_cpsr];                                     \
-        and r11, #PSR_MODE_MASK;                                        \
-        cmp r11, #PSR_MODE_HYP;                                         \
-        blne save_guest_regs
-
-save_guest_regs:
-        ldr r11, =0xffffffff  /* Clobber SP which is only valid for hypervisor 
frames. */
-        str r11, [sp, #UREGS_sp]
-        SAVE_ONE_BANKED(SP_usr)
-        /* LR_usr is the same physical register as lr and is saved in SAVE_ALL 
*/
-        SAVE_BANKED(svc)
-        SAVE_BANKED(abt)
-        SAVE_BANKED(und)
-        SAVE_BANKED(irq)
-        SAVE_BANKED(fiq)
-        SAVE_ONE_BANKED(R8_fiq); SAVE_ONE_BANKED(R9_fiq); 
SAVE_ONE_BANKED(R10_fiq)
-        SAVE_ONE_BANKED(R11_fiq); SAVE_ONE_BANKED(R12_fiq);
-        mov pc, lr
-
-#define DEFINE_TRAP_ENTRY(trap)                                         \
-        ALIGN;                                                          \
-trap_##trap:                                                            \
-        SAVE_ALL;                                                       \
-        cpsie i;        /* local_irq_enable */                          \
-        adr lr, return_from_trap;                                       \
-        mov r0, sp;                                                     \
-        mov r11, sp;                                                    \
-        bic sp, #7; /* Align the stack pointer (noop on guest trap) */  \
-        b do_trap_##trap
-
-#define DEFINE_TRAP_ENTRY_NOIRQ(trap)                                   \
-        ALIGN;                                                          \
-trap_##trap:                                                            \
-        SAVE_ALL;                                                       \
-        adr lr, return_from_trap;                                       \
-        mov r0, sp;                                                     \
-        mov r11, sp;                                                    \
-        bic sp, #7; /* Align the stack pointer (noop on guest trap) */  \
-        b do_trap_##trap
-
-.globl hyp_traps_vector
-        .align 5
-hyp_traps_vector:
-        .word 0                         /* 0x00 - Reset */
-        b trap_undefined_instruction    /* 0x04 - Undefined Instruction */
-        b trap_supervisor_call          /* 0x08 - Supervisor Call */
-        b trap_prefetch_abort           /* 0x0c - Prefetch Abort */
-        b trap_data_abort               /* 0x10 - Data Abort */
-        b trap_hypervisor               /* 0x14 - Hypervisor */
-        b trap_irq                      /* 0x18 - IRQ */
-        b trap_fiq                      /* 0x1c - FIQ */
-
-DEFINE_TRAP_ENTRY(undefined_instruction)
-DEFINE_TRAP_ENTRY(supervisor_call)
-DEFINE_TRAP_ENTRY(prefetch_abort)
-DEFINE_TRAP_ENTRY(data_abort)
-DEFINE_TRAP_ENTRY(hypervisor)
-DEFINE_TRAP_ENTRY_NOIRQ(irq)
-DEFINE_TRAP_ENTRY_NOIRQ(fiq)
-
-return_from_trap:
-        mov sp, r11
-ENTRY(return_to_new_vcpu)
-        ldr r11, [sp, #UREGS_cpsr]
-        and r11, #PSR_MODE_MASK
-        cmp r11, #PSR_MODE_HYP
-        beq return_to_hypervisor
-        /* Fall thru */
-ENTRY(return_to_guest)
-        mov r11, sp
-        bic sp, #7 /* Align the stack pointer */
-        bl leave_hypervisor_tail /* Disables interrupts on return */
-        mov sp, r11
-        RESTORE_ONE_BANKED(SP_usr)
-        /* LR_usr is the same physical register as lr and is restored below */
-        RESTORE_BANKED(svc)
-        RESTORE_BANKED(abt)
-        RESTORE_BANKED(und)
-        RESTORE_BANKED(irq)
-        RESTORE_BANKED(fiq)
-        RESTORE_ONE_BANKED(R8_fiq); RESTORE_ONE_BANKED(R9_fiq); 
RESTORE_ONE_BANKED(R10_fiq)
-        RESTORE_ONE_BANKED(R11_fiq); RESTORE_ONE_BANKED(R12_fiq);
-        /* Fall thru */
-ENTRY(return_to_hypervisor)
-        cpsid i
-        ldr lr, [sp, #UREGS_lr]
-        ldr r11, [sp, #UREGS_pc]
-        msr ELR_hyp, r11
-        ldr r11, [sp, #UREGS_cpsr]
-        msr SPSR_hyp, r11
-        pop {r0-r12}
-        add sp, #(UREGS_SP_usr - UREGS_sp); /* SP, LR, SPSR, PC */
-        eret
-
-/*
- * struct vcpu *__context_switch(struct vcpu *prev, struct vcpu *next)
- *
- * r0 - prev
- * r1 - next
- *
- * Returns prev in r0
- */
-ENTRY(__context_switch)
-        add     ip, r0, #VCPU_arch_saved_context
-        stmia   ip!, {r4 - sl, fp, sp, lr}      /* Save register state */
-
-        add     r4, r1, #VCPU_arch_saved_context
-        ldmia   r4, {r4 - sl, fp, sp, pc}       /* Load registers and return */
-
-/*
- * Local variables:
- * mode: ASM
- * indent-tabs-mode: nil
- * End:
- */
diff -r 984086ca8ca0 -r b094ba4bf985 xen/arch/arm/gic.c
--- a/xen/arch/arm/gic.c        Wed Dec 19 14:16:29 2012 +0000
+++ b/xen/arch/arm/gic.c        Wed Dec 19 14:16:30 2012 +0000
@@ -30,7 +30,7 @@
 #include <asm/p2m.h>
 #include <asm/domain.h>
 
-#include "gic.h"
+#include <asm/gic.h>
 
 /* Access to the GIC Distributor registers through the fixmap */
 #define GICD ((volatile uint32_t *) FIXMAP_ADDR(FIXMAP_GICD))
diff -r 984086ca8ca0 -r b094ba4bf985 xen/arch/arm/gic.h
--- a/xen/arch/arm/gic.h        Wed Dec 19 14:16:29 2012 +0000
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,172 +0,0 @@
-/*
- * xen/arch/arm/gic.h
- *
- * ARM Generic Interrupt Controller support
- *
- * Tim Deegan <tim@xxxxxxx>
- * Copyright (c) 2011 Citrix Systems.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- */
-
-#ifndef __ARCH_ARM_GIC_H__
-#define __ARCH_ARM_GIC_H__
-
-#define GICD_CTLR       (0x000/4)
-#define GICD_TYPER      (0x004/4)
-#define GICD_IIDR       (0x008/4)
-#define GICD_IGROUPR    (0x080/4)
-#define GICD_IGROUPRN   (0x0FC/4)
-#define GICD_ISENABLER  (0x100/4)
-#define GICD_ISENABLERN (0x17C/4)
-#define GICD_ICENABLER  (0x180/4)
-#define GICD_ICENABLERN (0x1fC/4)
-#define GICD_ISPENDR    (0x200/4)
-#define GICD_ISPENDRN   (0x27C/4)
-#define GICD_ICPENDR    (0x280/4)
-#define GICD_ICPENDRN   (0x2FC/4)
-#define GICD_ISACTIVER  (0x300/4)
-#define GICD_ISACTIVERN (0x37C/4)
-#define GICD_ICACTIVER  (0x380/4)
-#define GICD_ICACTIVERN (0x3FC/4)
-#define GICD_IPRIORITYR (0x400/4)
-#define GICD_IPRIORITYRN (0x7F8/4)
-#define GICD_ITARGETSR  (0x800/4)
-#define GICD_ITARGETSRN (0xBF8/4)
-#define GICD_ICFGR      (0xC00/4)
-#define GICD_ICFGRN     (0xCFC/4)
-#define GICD_NSACR      (0xE00/4)
-#define GICD_NSACRN     (0xEFC/4)
-#define GICD_ICPIDR2    (0xFE8/4)
-#define GICD_SGIR       (0xF00/4)
-#define GICD_CPENDSGIR  (0xF10/4)
-#define GICD_CPENDSGIRN (0xF1C/4)
-#define GICD_SPENDSGIR  (0xF20/4)
-#define GICD_SPENDSGIRN (0xF2C/4)
-#define GICD_ICPIDR2    (0xFE8/4)
-
-#define GICC_CTLR       (0x0000/4)
-#define GICC_PMR        (0x0004/4)
-#define GICC_BPR        (0x0008/4)
-#define GICC_IAR        (0x000C/4)
-#define GICC_EOIR       (0x0010/4)
-#define GICC_RPR        (0x0014/4)
-#define GICC_HPPIR      (0x0018/4)
-#define GICC_APR        (0x00D0/4)
-#define GICC_NSAPR      (0x00E0/4)
-#define GICC_DIR        (0x1000/4)
-
-#define GICH_HCR        (0x00/4)
-#define GICH_VTR        (0x04/4)
-#define GICH_VMCR       (0x08/4)
-#define GICH_MISR       (0x10/4)
-#define GICH_EISR0      (0x20/4)
-#define GICH_EISR1      (0x24/4)
-#define GICH_ELSR0      (0x30/4)
-#define GICH_ELSR1      (0x34/4)
-#define GICH_APR        (0xF0/4)
-#define GICH_LR         (0x100/4)
-
-/* Register bits */
-#define GICD_CTL_ENABLE 0x1
-
-#define GICD_TYPE_LINES 0x01f
-#define GICD_TYPE_CPUS  0x0e0
-#define GICD_TYPE_SEC   0x400
-
-#define GICC_CTL_ENABLE 0x1
-#define GICC_CTL_EOI    (0x1 << 9)
-
-#define GICC_IA_IRQ     0x03ff
-#define GICC_IA_CPU     0x1c00
-
-#define GICH_HCR_EN       (1 << 0)
-#define GICH_HCR_UIE      (1 << 1)
-#define GICH_HCR_LRENPIE  (1 << 2)
-#define GICH_HCR_NPIE     (1 << 3)
-#define GICH_HCR_VGRP0EIE (1 << 4)
-#define GICH_HCR_VGRP0DIE (1 << 5)
-#define GICH_HCR_VGRP1EIE (1 << 6)
-#define GICH_HCR_VGRP1DIE (1 << 7)
-
-#define GICH_MISR_EOI     (1 << 0)
-#define GICH_MISR_U       (1 << 1)
-#define GICH_MISR_LRENP   (1 << 2)
-#define GICH_MISR_NP      (1 << 3)
-#define GICH_MISR_VGRP0E  (1 << 4)
-#define GICH_MISR_VGRP0D  (1 << 5)
-#define GICH_MISR_VGRP1E  (1 << 6)
-#define GICH_MISR_VGRP1D  (1 << 7)
-
-#define GICH_LR_VIRTUAL_MASK    0x3ff
-#define GICH_LR_VIRTUAL_SHIFT   0
-#define GICH_LR_PHYSICAL_MASK   0x3ff
-#define GICH_LR_PHYSICAL_SHIFT  10
-#define GICH_LR_STATE_MASK      0x3
-#define GICH_LR_STATE_SHIFT     28
-#define GICH_LR_PRIORITY_SHIFT  23
-#define GICH_LR_MAINTENANCE_IRQ (1<<19)
-#define GICH_LR_PENDING         (1<<28)
-#define GICH_LR_ACTIVE          (1<<29)
-#define GICH_LR_GRP1            (1<<30)
-#define GICH_LR_HW              (1<<31)
-#define GICH_LR_CPUID_SHIFT     9
-#define GICH_VTR_NRLRGS         0x3f
-
-/* XXX: write this into the DT */
-#define VGIC_IRQ_EVTCHN_CALLBACK 31
-
-#ifndef __ASSEMBLY__
-extern int domain_vgic_init(struct domain *d);
-extern void domain_vgic_free(struct domain *d);
-
-extern int vcpu_vgic_init(struct vcpu *v);
-
-extern void vgic_vcpu_inject_irq(struct vcpu *v, unsigned int irq,int virtual);
-extern struct pending_irq *irq_to_pending(struct vcpu *v, unsigned int irq);
-
-extern void gic_route_ppis(void);
-extern void gic_route_spis(void);
-
-extern void gic_inject(void);
-
-extern void __cpuinit init_maintenance_interrupt(void);
-extern void gic_set_guest_irq(struct vcpu *v, unsigned int irq,
-        unsigned int state, unsigned int priority);
-extern int gic_route_irq_to_guest(struct domain *d, unsigned int irq,
-                                  const char * devname);
-
-/* Accept an interrupt from the GIC and dispatch its handler */
-extern void gic_interrupt(struct cpu_user_regs *regs, int is_fiq);
-/* Bring up the interrupt controller, and report # cpus attached */
-extern void gic_init(void);
-/* Bring up a secondary CPU's per-CPU GIC interface */
-extern void gic_init_secondary_cpu(void);
-/* Take down a CPU's per-CPU GIC interface */
-extern void gic_disable_cpu(void);
-/* setup the gic virtual interface for a guest */
-extern int gicv_setup(struct domain *d);
-
-/* Context switch */
-extern void gic_save_state(struct vcpu *v);
-extern void gic_restore_state(struct vcpu *v);
-
-#endif /* __ASSEMBLY__ */
-#endif
-
-/*
- * Local variables:
- * mode: C
- * c-set-style: "BSD"
- * c-basic-offset: 4
- * indent-tabs-mode: nil
- * End:
- */
diff -r 984086ca8ca0 -r b094ba4bf985 xen/arch/arm/head.S
--- a/xen/arch/arm/head.S       Wed Dec 19 14:16:29 2012 +0000
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,415 +0,0 @@
-/*
- * xen/arch/arm/head.S
- *
- * Start-of-day code for an ARMv7-A with virt extensions.
- *
- * Tim Deegan <tim@xxxxxxx>
- * Copyright (c) 2011 Citrix Systems.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- */
-
-#include <asm/config.h>
-#include <asm/page.h>
-#include <asm/processor-ca15.h>
-#include <asm/asm_defns.h>
-
-#define ZIMAGE_MAGIC_NUMBER 0x016f2818
-
-#define PT_PT  0xe7f /* nG=1, AF=1, SH=10, AP=01, NS=1, ATTR=111, T=1, P=1 */
-#define PT_MEM 0xe7d /* nG=1, AF=1, SH=10, AP=01, NS=1, ATTR=111, T=0, P=1 */
-#define PT_DEV 0xe71 /* nG=1, AF=1, SH=10, AP=01, NS=1, ATTR=100, T=0, P=1 */
-#define PT_DEV_L3 0xe73 /* lev3: nG=1, AF=1, SH=10, AP=01, NS=1, ATTR=100, 
T=1, P=1 */
-
-#define PT_UPPER(x) (PT_##x & 0xf00)
-#define PT_LOWER(x) (PT_##x & 0x0ff)
-
-/* Macro to print a string to the UART, if there is one.
- * Clobbers r0-r3. */
-#ifdef EARLY_UART_ADDRESS
-#define PRINT(_s)       \
-        adr   r0, 98f ; \
-        bl    puts    ; \
-        b     99f     ; \
-98:     .asciz _s     ; \
-        .align 2      ; \
-99:
-#else
-#define PRINT(s)
-#endif
-
-        .arm
-
-        /* This must be the very first address in the loaded image.
-         * It should be linked at XEN_VIRT_START, and loaded at any
-         * 2MB-aligned address.  All of text+data+bss must fit in 2MB,
-         * or the initial pagetable code below will need adjustment. */
-        .global start
-start:
-
-        /* zImage magic header, see:
-         * 
http://www.simtec.co.uk/products/SWLINUX/files/booting_article.html#d0e309
-         */
-        .rept 8
-        mov   r0, r0
-        .endr
-        b     past_zImage
-
-        .word ZIMAGE_MAGIC_NUMBER    /* Magic numbers to help the loader */
-        .word 0x00000000             /* absolute load/run zImage address or
-                                      * 0 for PiC */
-        .word (_end - start)         /* zImage end address */
-
-past_zImage:
-        cpsid aif                    /* Disable all interrupts */
-
-        /* Save the bootloader arguments in less-clobberable registers */
-        mov   r7, r1                 /* r7 := ARM-linux machine type */
-        mov   r8, r2                 /* r8 := ATAG base address */
-
-        /* Find out where we are */
-        ldr   r0, =start
-        adr   r9, start              /* r9  := paddr (start) */
-        sub   r10, r9, r0            /* r10 := phys-offset */
-
-        /* Using the DTB in the .dtb section? */
-#ifdef CONFIG_DTB_FILE
-        ldr   r8, =_sdtb
-        add   r8, r10                /* r8 := paddr(DTB) */
-#endif
-
-        /* Are we the boot CPU? */
-        mov   r12, #0                /* r12 := CPU ID */
-        mrc   CP32(r0, MPIDR)
-        tst   r0, #(1<<31)           /* Multiprocessor extension supported? */
-        beq   boot_cpu
-        tst   r0, #(1<<30)           /* Uniprocessor system? */
-        bne   boot_cpu
-        bics  r12, r0, #(0xff << 24) /* Mask out flags to get CPU ID */
-        beq   boot_cpu               /* If we're CPU 0, boot now */
-
-        /* Non-boot CPUs wait here to be woken up one at a time. */
-1:      dsb
-        ldr   r0, =smp_up_cpu        /* VA of gate */
-        add   r0, r0, r10            /* PA of gate */
-        ldr   r1, [r0]               /* Which CPU is being booted? */
-        teq   r1, r12                /* Is it us? */
-        wfene
-        bne   1b
-
-boot_cpu:
-#ifdef EARLY_UART_ADDRESS
-        ldr   r11, =EARLY_UART_ADDRESS  /* r11 := UART base address */
-        teq   r12, #0                   /* CPU 0 sets up the UART too */
-        bleq  init_uart
-        PRINT("- CPU ")
-        mov   r0, r12
-        bl    putn
-        PRINT(" booting -\r\n")
-#endif
-
-        /* Wake up secondary cpus */
-        teq   r12, #0
-        bleq  kick_cpus
-
-        /* Check that this CPU has Hyp mode */
-        mrc   CP32(r0, ID_PFR1)
-        and   r0, r0, #0xf000        /* Bits 12-15 define virt extensions */
-        teq   r0, #0x1000            /* Must == 0x1 or may be incompatible */
-        beq   1f
-        PRINT("- CPU doesn't support the virtualization extensions -\r\n")
-        b     fail
-1:
-        /* Check if we're already in it */
-        mrs   r0, cpsr
-        and   r0, r0, #0x1f          /* Mode is in the low 5 bits of CPSR */
-        teq   r0, #0x1a              /* Hyp Mode? */
-        bne   1f
-        PRINT("- Started in Hyp mode -\r\n")
-        b     hyp
-1:
-        /* Otherwise, it must have been Secure Supervisor mode */
-        mrc   CP32(r0, SCR)
-        tst   r0, #0x1               /* Not-Secure bit set? */
-        beq   1f
-        PRINT("- CPU is not in Hyp mode or Secure state -\r\n")
-        b     fail
-1:
-        /* OK, we're in Secure state. */
-        PRINT("- Started in Secure state -\r\n- Entering Hyp mode -\r\n")
-        ldr   r0, =enter_hyp_mode    /* VA of function */
-        adr   lr, hyp                /* Set return address for call */
-        add   pc, r0, r10            /* Call PA of function */
-
-hyp:
-
-        /* Zero BSS On the boot CPU to avoid nasty surprises */
-        teq   r12, #0
-        bne   skip_bss
-
-        PRINT("- Zero BSS -\r\n")
-        ldr   r0, =__bss_start       /* Load start & end of bss */
-        ldr   r1, =__bss_end
-        add   r0, r0, r10            /* Apply physical offset */
-        add   r1, r1, r10
-
-        mov   r2, #0
-1:      str   r2, [r0], #4
-        cmp   r0, r1
-        blo   1b
-
-skip_bss:
-
-        PRINT("- Setting up control registers -\r\n")
-
-        /* Read CPU ID */
-        mrc   CP32(r0, MIDR)
-        ldr   r1, =(MIDR_MASK)
-        and   r0, r0, r1
-        /* Is this a Cortex A15? */
-        ldr   r1, =(CORTEX_A15_ID)
-        teq   r0, r1
-        bleq  cortex_a15_init
-
-        /* Set up memory attribute type tables */
-        ldr   r0, =MAIR0VAL
-        ldr   r1, =MAIR1VAL
-        mcr   CP32(r0, MAIR0)
-        mcr   CP32(r1, MAIR1)
-        mcr   CP32(r0, HMAIR0)
-        mcr   CP32(r1, HMAIR1)
-
-        /* Set up the HTCR:
-         * PT walks use Outer-Shareable accesses,
-         * PT walks are write-back, no-write-allocate in both cache levels,
-         * Full 32-bit address space goes through this table. */
-        ldr   r0, =0x80002500
-        mcr   CP32(r0, HTCR)
-
-        /* Set up the HSCTLR:
-         * Exceptions in LE ARM,
-         * Low-latency IRQs disabled,
-         * Write-implies-XN disabled (for now),
-         * D-cache disabled (for now),
-         * I-cache enabled,
-         * Alignment checking enabled,
-         * MMU translation disabled (for now). */
-        ldr   r0, =(HSCTLR_BASE|SCTLR_A)
-        mcr   CP32(r0, HSCTLR)
-
-        /* Write Xen's PT's paddr into the HTTBR */
-        ldr   r4, =xen_pgtable
-        add   r4, r4, r10            /* r4 := paddr (xen_pagetable) */
-        mov   r5, #0                 /* r4:r5 is paddr (xen_pagetable) */
-        mcrr  CP64(r4, r5, HTTBR)
-
-        /* Non-boot CPUs don't need to rebuild the pagetable */
-        teq   r12, #0
-        bne   pt_ready
-
-        /* console fixmap */
-#ifdef EARLY_UART_ADDRESS
-        ldr   r1, =xen_fixmap
-        add   r1, r1, r10            /* r1 := paddr (xen_fixmap) */
-        mov   r3, #0
-        lsr   r2, r11, #12
-        lsl   r2, r2, #12            /* 4K aligned paddr of UART */
-        orr   r2, r2, #PT_UPPER(DEV_L3)
-        orr   r2, r2, #PT_LOWER(DEV_L3) /* r2:r3 := 4K dev map including UART 
*/
-        strd  r2, r3, [r1, #(FIXMAP_CONSOLE*8)] /* Map it in the first 
fixmap's slot */
-#endif
-
-        /* Build the baseline idle pagetable's first-level entries */
-        ldr   r1, =xen_second
-        add   r1, r1, r10            /* r1 := paddr (xen_second) */
-        mov   r3, #0x0
-        orr   r2, r1, #PT_UPPER(PT)  /* r2:r3 := table map of xen_second */
-        orr   r2, r2, #PT_LOWER(PT)  /* (+ rights for linear PT) */
-        strd  r2, r3, [r4, #0]       /* Map it in slot 0 */
-        add   r2, r2, #0x1000
-        strd  r2, r3, [r4, #8]       /* Map 2nd page in slot 1 */
-        add   r2, r2, #0x1000
-        strd  r2, r3, [r4, #16]      /* Map 3rd page in slot 2 */
-        add   r2, r2, #0x1000
-        strd  r2, r3, [r4, #24]      /* Map 4th page in slot 3 */
-
-        /* Now set up the second-level entries */
-        orr   r2, r9, #PT_UPPER(MEM)
-        orr   r2, r2, #PT_LOWER(MEM) /* r2:r3 := 2MB normal map of Xen */
-        mov   r4, r9, lsr #18        /* Slot for paddr(start) */
-        strd  r2, r3, [r1, r4]       /* Map Xen there */
-        ldr   r4, =start
-        lsr   r4, #18                /* Slot for vaddr(start) */
-        strd  r2, r3, [r1, r4]       /* Map Xen there too */
-
-        /* xen_fixmap pagetable */
-        ldr   r2, =xen_fixmap
-        add   r2, r2, r10            /* r2 := paddr (xen_fixmap) */
-        orr   r2, r2, #PT_UPPER(PT)
-        orr   r2, r2, #PT_LOWER(PT)  /* r2:r3 := table map of xen_fixmap */
-        add   r4, r4, #8
-        strd  r2, r3, [r1, r4]       /* Map it in the fixmap's slot */
-
-        mov   r3, #0x0
-        lsr   r2, r8, #21
-        lsl   r2, r2, #21            /* 2MB-aligned paddr of DTB */
-        orr   r2, r2, #PT_UPPER(MEM)
-        orr   r2, r2, #PT_LOWER(MEM) /* r2:r3 := 2MB RAM incl. DTB */
-        add   r4, r4, #8
-        strd  r2, r3, [r1, r4]       /* Map it in the early boot slot */
-
-pt_ready:
-        PRINT("- Turning on paging -\r\n")
-
-        ldr   r1, =paging            /* Explicit vaddr, not RIP-relative */
-        mrc   CP32(r0, HSCTLR)
-        orr   r0, r0, #(SCTLR_M|SCTLR_C) /* Enable MMU and D-cache */
-        dsb                          /* Flush PTE writes and finish reads */
-        mcr   CP32(r0, HSCTLR)       /* now paging is enabled */
-        isb                          /* Now, flush the icache */
-        mov   pc, r1                 /* Get a proper vaddr into PC */
-paging:
-
-
-#ifdef EARLY_UART_ADDRESS
-        /* Use a virtual address to access the UART. */
-        ldr   r11, =FIXMAP_ADDR(FIXMAP_CONSOLE)
-#endif
-
-        PRINT("- Ready -\r\n")
-
-        /* The boot CPU should go straight into C now */
-        teq   r12, #0
-        beq   launch
-
-        /* Non-boot CPUs need to move on to the relocated pagetables */
-        mov   r0, #0
-        ldr   r4, =boot_httbr        /* VA of HTTBR value stashed by CPU 0 */
-        add   r4, r4, r10            /* PA of it */
-        ldrd  r4, r5, [r4]           /* Actual value */
-        dsb
-        mcrr  CP64(r4, r5, HTTBR)
-        dsb
-        isb
-        mcr   CP32(r0, TLBIALLH)     /* Flush hypervisor TLB */
-        mcr   CP32(r0, ICIALLU)      /* Flush I-cache */
-        mcr   CP32(r0, BPIALL)       /* Flush branch predictor */
-        dsb                          /* Ensure completion of TLB+BP flush */
-        isb
-
-        /* Non-boot CPUs report that they've got this far */
-        ldr   r0, =ready_cpus
-1:      ldrex r1, [r0]               /*            { read # of ready CPUs } */
-        add   r1, r1, #1             /* Atomically { ++                   } */
-        strex r2, r1, [r0]           /*            { writeback            } */
-        teq   r2, #0
-        bne   1b
-        dsb
-        mcr   CP32(r0, DCCMVAC)      /* flush D-Cache */
-        dsb
-
-        /* Here, the non-boot CPUs must wait again -- they're now running on
-         * the boot CPU's pagetables so it's safe for the boot CPU to
-         * overwrite the non-relocated copy of Xen.  Once it's done that,
-         * and brought up the memory allocator, non-boot CPUs can get their
-         * own stacks and enter C. */
-1:      wfe
-        dsb
-        ldr   r0, =smp_up_cpu
-        ldr   r1, [r0]               /* Which CPU is being booted? */
-        teq   r1, r12                /* Is it us? */
-        bne   1b
-
-launch:
-        ldr   r0, =init_stack        /* Find the boot-time stack */
-        ldr   sp, [r0]
-        add   sp, #STACK_SIZE        /* (which grows down from the top). */
-        sub   sp, #CPUINFO_sizeof    /* Make room for CPU save record */
-        mov   r0, r10                /* Marshal args: - phys_offset */
-        mov   r1, r7                 /*               - machine type */
-        mov   r2, r8                 /*               - ATAG address */
-        movs  r3, r12                /*               - CPU ID */
-        beq   start_xen              /* and disappear into the land of C */
-        b     start_secondary        /* (to the appropriate entry point) */
-
-/* Fail-stop
- * r0: string explaining why */
-fail:   PRINT("- Boot failed -\r\n")
-1:      wfe
-        b     1b
-
-#ifdef EARLY_UART_ADDRESS
-
-/* Bring up the UART. Specific to the PL011 UART.
- * Clobbers r0-r2 */
-init_uart:
-        mov   r1, #0x0
-        str   r1, [r11, #0x24]       /* -> UARTIBRD (Baud divisor fraction) */
-        mov   r1, #0x4               /* 7.3728MHz / 0x4 == 16 * 115200 */
-        str   r1, [r11, #0x24]       /* -> UARTIBRD (Baud divisor integer) */
-        mov   r1, #0x60              /* 8n1 */
-        str   r1, [r11, #0x24]       /* -> UARTLCR_H (Line control) */
-        ldr   r1, =0x00000301        /* RXE | TXE | UARTEN */
-        str   r1, [r11, #0x30]       /* -> UARTCR (Control Register) */
-        adr   r0, 1f
-        b     puts
-1:      .asciz "- UART enabled -\r\n"
-        .align 4
-
-/* Print early debug messages.  Specific to the PL011 UART.
- * r0: Nul-terminated string to print.
- * Clobbers r0-r2 */
-puts:
-        ldr   r2, [r11, #0x18]       /* <- UARTFR (Flag register) */
-        tst   r2, #0x8               /* Check BUSY bit */
-        bne   puts                   /* Wait for the UART to be ready */
-        ldrb  r2, [r0], #1           /* Load next char */
-        teq   r2, #0                 /* Exit on nul */
-        moveq pc, lr
-        str   r2, [r11]              /* -> UARTDR (Data Register) */
-        b     puts
-
-/* Print a 32-bit number in hex.  Specific to the PL011 UART.
- * r0: Number to print.
- * clobbers r0-r3 */
-putn:
-        adr   r1, hex
-        mov   r3, #8
-1:      ldr   r2, [r11, #0x18]       /* <- UARTFR (Flag register) */
-        tst   r2, #0x8               /* Check BUSY bit */
-        bne   1b                     /* Wait for the UART to be ready */
-        and   r2, r0, #0xf0000000    /* Mask off the top nybble */
-        ldrb  r2, [r1, r2, lsr #28]  /* Convert to a char */
-        str   r2, [r11]              /* -> UARTDR (Data Register) */
-        lsl   r0, #4                 /* Roll it through one nybble at a time */
-        subs  r3, r3, #1
-        bne   1b
-        mov   pc, lr
-
-hex:    .ascii "0123456789abcdef"
-        .align 2
-
-#else  /* EARLY_UART_ADDRESS */
-
-init_uart:
-.global early_puts
-early_puts:
-puts:
-putn:   mov   pc, lr
-
-#endif /* EARLY_UART_ADDRESS */
-
-/*
- * Local variables:
- * mode: ASM
- * indent-tabs-mode: nil
- * End:
- */
diff -r 984086ca8ca0 -r b094ba4bf985 xen/arch/arm/irq.c
--- a/xen/arch/arm/irq.c        Wed Dec 19 14:16:29 2012 +0000
+++ b/xen/arch/arm/irq.c        Wed Dec 19 14:16:30 2012 +0000
@@ -25,7 +25,7 @@
 #include <xen/errno.h>
 #include <xen/sched.h>
 
-#include "gic.h"
+#include <asm/gic.h>
 
 static void enable_none(struct irq_desc *irq) { }
 static unsigned int startup_none(struct irq_desc *irq) { return 0; }
diff -r 984086ca8ca0 -r b094ba4bf985 xen/arch/arm/lib/Makefile
--- a/xen/arch/arm/lib/Makefile Wed Dec 19 14:16:29 2012 +0000
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,5 +0,0 @@
-obj-y += memcpy.o memmove.o memset.o memzero.o
-obj-y += findbit.o setbit.o
-obj-y += setbit.o clearbit.o changebit.o
-obj-y += testsetbit.o testclearbit.o testchangebit.o
-obj-y += lib1funcs.o lshrdi3.o div64.o
diff -r 984086ca8ca0 -r b094ba4bf985 xen/arch/arm/lib/assembler.h
--- a/xen/arch/arm/lib/assembler.h      Wed Dec 19 14:16:29 2012 +0000
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,325 +0,0 @@
-/* From Linux arch/arm/include/asm/assembler.h */
-/*
- *  arch/arm/include/asm/assembler.h
- *
- *  Copyright (C) 1996-2000 Russell King
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- *  This file contains arm architecture specific defines
- *  for the different processors.
- *
- *  Do not include any C declarations in this file - it is included by
- *  assembler source.
- */
-#ifndef __ASM_ASSEMBLER_H__
-#define __ASM_ASSEMBLER_H__
-
-#ifndef __ASSEMBLY__
-#error "Only include this from assembly code"
-#endif
-
-// No Thumb, hence:
-#define W(instr)        instr
-#define ARM(instr...)   instr
-#define THUMB(instr...)
-
-#ifdef CONFIG_ARM_UNWIND
-#define UNWIND(code...)         code
-#else
-#define UNWIND(code...)
-#endif
-
-/*
- * Endian independent macros for shifting bytes within registers.
- */
-#ifndef __ARMEB__
-#define pull            lsr
-#define push            lsl
-#define get_byte_0      lsl #0
-#define get_byte_1     lsr #8
-#define get_byte_2     lsr #16
-#define get_byte_3     lsr #24
-#define put_byte_0      lsl #0
-#define put_byte_1     lsl #8
-#define put_byte_2     lsl #16
-#define put_byte_3     lsl #24
-#else
-#define pull            lsl
-#define push            lsr
-#define get_byte_0     lsr #24
-#define get_byte_1     lsr #16
-#define get_byte_2     lsr #8
-#define get_byte_3      lsl #0
-#define put_byte_0     lsl #24
-#define put_byte_1     lsl #16
-#define put_byte_2     lsl #8
-#define put_byte_3      lsl #0
-#endif
-
-/*
- * Data preload for architectures that support it
- */
-#if __LINUX_ARM_ARCH__ >= 5
-#define PLD(code...)   code
-#else
-#define PLD(code...)
-#endif
-
-/*
- * This can be used to enable code to cacheline align the destination
- * pointer when bulk writing to memory.  Experiments on StrongARM and
- * XScale didn't show this a worthwhile thing to do when the cache is not
- * set to write-allocate (this would need further testing on XScale when WA
- * is used).
- *
- * On Feroceon there is much to gain however, regardless of cache mode.
- */
-#ifdef CONFIG_CPU_FEROCEON
-#define CALGN(code...) code
-#else
-#define CALGN(code...)
-#endif
-
-/*
- * Enable and disable interrupts
- */
-#if __LINUX_ARM_ARCH__ >= 6
-       .macro  disable_irq_notrace
-       cpsid   i
-       .endm
-
-       .macro  enable_irq_notrace
-       cpsie   i
-       .endm
-#else
-       .macro  disable_irq_notrace
-       msr     cpsr_c, #PSR_I_BIT | SVC_MODE
-       .endm
-
-       .macro  enable_irq_notrace
-       msr     cpsr_c, #SVC_MODE
-       .endm
-#endif
-
-       .macro asm_trace_hardirqs_off
-#if defined(CONFIG_TRACE_IRQFLAGS)
-       stmdb   sp!, {r0-r3, ip, lr}
-       bl      trace_hardirqs_off
-       ldmia   sp!, {r0-r3, ip, lr}
-#endif
-       .endm
-
-       .macro asm_trace_hardirqs_on_cond, cond
-#if defined(CONFIG_TRACE_IRQFLAGS)
-       /*
-        * actually the registers should be pushed and pop'd conditionally, but
-        * after bl the flags are certainly clobbered
-        */
-       stmdb   sp!, {r0-r3, ip, lr}
-       bl\cond trace_hardirqs_on
-       ldmia   sp!, {r0-r3, ip, lr}
-#endif
-       .endm
-
-       .macro asm_trace_hardirqs_on
-       asm_trace_hardirqs_on_cond al
-       .endm
-
-       .macro disable_irq
-       disable_irq_notrace
-       asm_trace_hardirqs_off
-       .endm
-
-       .macro enable_irq
-       asm_trace_hardirqs_on
-       enable_irq_notrace
-       .endm
-/*
- * Save the current IRQ state and disable IRQs.  Note that this macro
- * assumes FIQs are enabled, and that the processor is in SVC mode.
- */
-       .macro  save_and_disable_irqs, oldcpsr
-       mrs     \oldcpsr, cpsr
-       disable_irq
-       .endm
-
-/*
- * Restore interrupt state previously stored in a register.  We don't
- * guarantee that this will preserve the flags.
- */
-       .macro  restore_irqs_notrace, oldcpsr
-       msr     cpsr_c, \oldcpsr
-       .endm
-
-       .macro restore_irqs, oldcpsr
-       tst     \oldcpsr, #PSR_I_BIT
-       asm_trace_hardirqs_on_cond eq
-       restore_irqs_notrace \oldcpsr
-       .endm
-
-#define USER(x...)                             \
-9999:  x;                                      \
-       .pushsection __ex_table,"a";            \
-       .align  3;                              \
-       .long   9999b,9001f;                    \
-       .popsection
-
-#ifdef CONFIG_SMP
-#define ALT_SMP(instr...)                                      \
-9998:  instr
-/*
- * Note: if you get assembler errors from ALT_UP() when building with
- * CONFIG_THUMB2_KERNEL, you almost certainly need to use
- * ALT_SMP( W(instr) ... )
- */
-#define ALT_UP(instr...)                                       \
-       .pushsection ".alt.smp.init", "a"                       ;\
-       .long   9998b                                           ;\
-9997:  instr                                                   ;\
-       .if . - 9997b != 4                                      ;\
-               .error "ALT_UP() content must assemble to exactly 4 bytes";\
-       .endif                                                  ;\
-       .popsection
-#define ALT_UP_B(label)                                        \
-       .equ    up_b_offset, label - 9998b                      ;\
-       .pushsection ".alt.smp.init", "a"                       ;\
-       .long   9998b                                           ;\
-       W(b)    . + up_b_offset                                 ;\
-       .popsection
-#else
-#define ALT_SMP(instr...)
-#define ALT_UP(instr...) instr
-#define ALT_UP_B(label) b label
-#endif
-
-/*
- * Instruction barrier
- */
-       .macro  instr_sync
-#if __LINUX_ARM_ARCH__ >= 7
-       isb
-#elif __LINUX_ARM_ARCH__ == 6
-       mcr     p15, 0, r0, c7, c5, 4
-#endif
-       .endm
-
-/*
- * SMP data memory barrier
- */
-       .macro  smp_dmb mode
-#ifdef CONFIG_SMP
-#if __LINUX_ARM_ARCH__ >= 7
-       .ifeqs "\mode","arm"
-       ALT_SMP(dmb)
-       .else
-       ALT_SMP(W(dmb))
-       .endif
-#elif __LINUX_ARM_ARCH__ == 6
-       ALT_SMP(mcr     p15, 0, r0, c7, c10, 5) @ dmb
-#else
-#error Incompatible SMP platform
-#endif
-       .ifeqs "\mode","arm"
-       ALT_UP(nop)
-       .else
-       ALT_UP(W(nop))
-       .endif
-#endif
-       .endm
-
-#ifdef CONFIG_THUMB2_KERNEL
-       .macro  setmode, mode, reg
-       mov     \reg, #\mode
-       msr     cpsr_c, \reg
-       .endm
-#else
-       .macro  setmode, mode, reg
-       msr     cpsr_c, #\mode
-       .endm
-#endif
-
-/*
- * STRT/LDRT access macros with ARM and Thumb-2 variants
- */
-#ifdef CONFIG_THUMB2_KERNEL
-
-       .macro  usraccoff, instr, reg, ptr, inc, off, cond, abort, t=T()
-9999:
-       .if     \inc == 1
-       \instr\cond\()b\()\t\().w \reg, [\ptr, #\off]
-       .elseif \inc == 4
-       \instr\cond\()\t\().w \reg, [\ptr, #\off]
-       .else
-       .error  "Unsupported inc macro argument"
-       .endif
-
-       .pushsection __ex_table,"a"
-       .align  3
-       .long   9999b, \abort
-       .popsection
-       .endm
-
-       .macro  usracc, instr, reg, ptr, inc, cond, rept, abort
-       @ explicit IT instruction needed because of the label
-       @ introduced by the USER macro
-       .ifnc   \cond,al
-       .if     \rept == 1
-       itt     \cond
-       .elseif \rept == 2
-       ittt    \cond
-       .else
-       .error  "Unsupported rept macro argument"
-       .endif
-       .endif
-
-       @ Slightly optimised to avoid incrementing the pointer twice
-       usraccoff \instr, \reg, \ptr, \inc, 0, \cond, \abort
-       .if     \rept == 2
-       usraccoff \instr, \reg, \ptr, \inc, \inc, \cond, \abort
-       .endif
-
-       add\cond \ptr, #\rept * \inc
-       .endm
-
-#else  /* !CONFIG_THUMB2_KERNEL */
-
-       .macro  usracc, instr, reg, ptr, inc, cond, rept, abort, t=T()
-       .rept   \rept
-9999:
-       .if     \inc == 1
-       \instr\cond\()b\()\t \reg, [\ptr], #\inc
-       .elseif \inc == 4
-       \instr\cond\()\t \reg, [\ptr], #\inc
-       .else
-       .error  "Unsupported inc macro argument"
-       .endif
-
-       .pushsection __ex_table,"a"
-       .align  3
-       .long   9999b, \abort
-       .popsection
-       .endr
-       .endm
-
-#endif /* CONFIG_THUMB2_KERNEL */
-
-       .macro  strusr, reg, ptr, inc, cond=al, rept=1, abort=9001f
-       usracc  str, \reg, \ptr, \inc, \cond, \rept, \abort
-       .endm
-
-       .macro  ldrusr, reg, ptr, inc, cond=al, rept=1, abort=9001f
-       usracc  ldr, \reg, \ptr, \inc, \cond, \rept, \abort
-       .endm
-
-/* Utility macro for declaring string literals */
-       .macro  string name:req, string
-       .type \name , #object
-\name:
-       .asciz "\string"
-       .size \name , . - \name
-       .endm
-
-#endif /* __ASM_ASSEMBLER_H__ */
diff -r 984086ca8ca0 -r b094ba4bf985 xen/arch/arm/lib/bitops.h
--- a/xen/arch/arm/lib/bitops.h Wed Dec 19 14:16:29 2012 +0000
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,87 +0,0 @@
-#include <xen/config.h>
-
-#if __LINUX_ARM_ARCH__ >= 6
-       .macro  bitop, instr
-       ands    ip, r1, #3
-       strneb  r1, [ip]                @ assert word-aligned
-       mov     r2, #1
-       and     r3, r0, #31             @ Get bit offset
-       mov     r0, r0, lsr #5
-       add     r1, r1, r0, lsl #2      @ Get word offset
-       mov     r3, r2, lsl r3
-1:     ldrex   r2, [r1]
-       \instr  r2, r2, r3
-       strex   r0, r2, [r1]
-       cmp     r0, #0
-       bne     1b
-       bx      lr
-       .endm
-
-       .macro  testop, instr, store
-       ands    ip, r1, #3
-       strneb  r1, [ip]                @ assert word-aligned
-       mov     r2, #1
-       and     r3, r0, #31             @ Get bit offset
-       mov     r0, r0, lsr #5
-       add     r1, r1, r0, lsl #2      @ Get word offset
-       mov     r3, r2, lsl r3          @ create mask
-       smp_dmb
-1:     ldrex   r2, [r1]
-       ands    r0, r2, r3              @ save old value of bit
-       \instr  r2, r2, r3              @ toggle bit
-       strex   ip, r2, [r1]
-       cmp     ip, #0
-       bne     1b
-       smp_dmb
-       cmp     r0, #0
-       movne   r0, #1
-2:     bx      lr
-       .endm
-#else
-       .macro  bitop, name, instr
-ENTRY( \name           )
-UNWIND(        .fnstart        )
-       ands    ip, r1, #3
-       strneb  r1, [ip]                @ assert word-aligned
-       and     r2, r0, #31
-       mov     r0, r0, lsr #5
-       mov     r3, #1
-       mov     r3, r3, lsl r2
-       save_and_disable_irqs ip
-       ldr     r2, [r1, r0, lsl #2]
-       \instr  r2, r2, r3
-       str     r2, [r1, r0, lsl #2]
-       restore_irqs ip
-       mov     pc, lr
-UNWIND(        .fnend          )
-ENDPROC(\name          )
-       .endm
-
-/**
- * testop - implement a test_and_xxx_bit operation.
- * @instr: operational instruction
- * @store: store instruction
- *
- * Note: we can trivially conditionalise the store instruction
- * to avoid dirtying the data cache.
- */
-       .macro  testop, name, instr, store
-ENTRY( \name           )
-UNWIND(        .fnstart        )
-       ands    ip, r1, #3
-       strneb  r1, [ip]                @ assert word-aligned
-       and     r3, r0, #31
-       mov     r0, r0, lsr #5
-       save_and_disable_irqs ip
-       ldr     r2, [r1, r0, lsl #2]!
-       mov     r0, #1
-       tst     r2, r0, lsl r3
-       \instr  r2, r2, r0, lsl r3
-       \store  r2, [r1]
-       moveq   r0, #0
-       restore_irqs ip
-       mov     pc, lr
-UNWIND(        .fnend          )
-ENDPROC(\name          )
-       .endm
-#endif
diff -r 984086ca8ca0 -r b094ba4bf985 xen/arch/arm/lib/changebit.S
--- a/xen/arch/arm/lib/changebit.S      Wed Dec 19 14:16:29 2012 +0000
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,18 +0,0 @@
-/*
- *  linux/arch/arm/lib/changebit.S
- *
- *  Copyright (C) 1995-1996 Russell King
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-#include <xen/config.h>
-
-#include "assembler.h"
-#include "bitops.h"
-                .text
-
-ENTRY(_change_bit)
-       bitop   eor
-ENDPROC(_change_bit)
diff -r 984086ca8ca0 -r b094ba4bf985 xen/arch/arm/lib/clearbit.S
--- a/xen/arch/arm/lib/clearbit.S       Wed Dec 19 14:16:29 2012 +0000
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,19 +0,0 @@
-/*
- *  linux/arch/arm/lib/clearbit.S
- *
- *  Copyright (C) 1995-1996 Russell King
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <xen/config.h>
-
-#include "assembler.h"
-#include "bitops.h"
-                .text
-
-ENTRY(_clear_bit)
-       bitop   bic
-ENDPROC(_clear_bit)
diff -r 984086ca8ca0 -r b094ba4bf985 xen/arch/arm/lib/copy_template.S
--- a/xen/arch/arm/lib/copy_template.S  Wed Dec 19 14:16:29 2012 +0000
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,267 +0,0 @@
-/*
- *  linux/arch/arm/lib/copy_template.s
- *
- *  Code template for optimized memory copy functions
- *
- *  Author:    Nicolas Pitre
- *  Created:   Sep 28, 2005
- *  Copyright: MontaVista Software, Inc.
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License version 2 as
- *  published by the Free Software Foundation.
- */
-
-/*
- * Theory of operation
- * -------------------
- *
- * This file provides the core code for a forward memory copy used in
- * the implementation of memcopy(), copy_to_user() and copy_from_user().
- *
- * The including file must define the following accessor macros
- * according to the need of the given function:
- *
- * ldr1w ptr reg abort
- *
- *     This loads one word from 'ptr', stores it in 'reg' and increments
- *     'ptr' to the next word. The 'abort' argument is used for fixup tables.
- *
- * ldr4w ptr reg1 reg2 reg3 reg4 abort
- * ldr8w ptr, reg1 reg2 reg3 reg4 reg5 reg6 reg7 reg8 abort
- *
- *     This loads four or eight words starting from 'ptr', stores them
- *     in provided registers and increments 'ptr' past those words.
- *     The'abort' argument is used for fixup tables.
- *
- * ldr1b ptr reg cond abort
- *
- *     Similar to ldr1w, but it loads a byte and increments 'ptr' one byte.
- *     It also must apply the condition code if provided, otherwise the
- *     "al" condition is assumed by default.
- *
- * str1w ptr reg abort
- * str8w ptr reg1 reg2 reg3 reg4 reg5 reg6 reg7 reg8 abort
- * str1b ptr reg cond abort
- *
- *     Same as their ldr* counterparts, but data is stored to 'ptr' location
- *     rather than being loaded.
- *
- * enter reg1 reg2
- *
- *     Preserve the provided registers on the stack plus any additional
- *     data as needed by the implementation including this code. Called
- *     upon code entry.
- *
- * exit reg1 reg2
- *
- *     Restore registers with the values previously saved with the
- *     'preserv' macro. Called upon code termination.
- *
- * LDR1W_SHIFT
- * STR1W_SHIFT
- *
- *     Correction to be applied to the "ip" register when branching into
- *     the ldr1w or str1w instructions (some of these macros may expand to
- *     than one 32bit instruction in Thumb-2)
- */
-
-
-               enter   r4, lr
-
-               subs    r2, r2, #4
-               blt     8f
-               ands    ip, r0, #3
-       PLD(    pld     [r1, #0]                )
-               bne     9f
-               ands    ip, r1, #3
-               bne     10f
-
-1:             subs    r2, r2, #(28)
-               stmfd   sp!, {r5 - r8}
-               blt     5f
-
-       CALGN(  ands    ip, r0, #31             )
-       CALGN(  rsb     r3, ip, #32             )
-       CALGN(  sbcnes  r4, r3, r2              )  @ C is always set here
-       CALGN(  bcs     2f                      )
-       CALGN(  adr     r4, 6f                  )
-       CALGN(  subs    r2, r2, r3              )  @ C gets set
-       CALGN(  add     pc, r4, ip              )
-
-       PLD(    pld     [r1, #0]                )
-2:     PLD(    subs    r2, r2, #96             )
-       PLD(    pld     [r1, #28]               )
-       PLD(    blt     4f                      )
-       PLD(    pld     [r1, #60]               )
-       PLD(    pld     [r1, #92]               )
-
-3:     PLD(    pld     [r1, #124]              )
-4:             ldr8w   r1, r3, r4, r5, r6, r7, r8, ip, lr, abort=20f
-               subs    r2, r2, #32
-               str8w   r0, r3, r4, r5, r6, r7, r8, ip, lr, abort=20f
-               bge     3b
-       PLD(    cmn     r2, #96                 )
-       PLD(    bge     4b                      )
-
-5:             ands    ip, r2, #28
-               rsb     ip, ip, #32
-#if LDR1W_SHIFT > 0
-               lsl     ip, ip, #LDR1W_SHIFT
-#endif
-               addne   pc, pc, ip              @ C is always clear here
-               b       7f
-6:
-               .rept   (1 << LDR1W_SHIFT)
-               W(nop)
-               .endr
-               ldr1w   r1, r3, abort=20f
-               ldr1w   r1, r4, abort=20f
-               ldr1w   r1, r5, abort=20f
-               ldr1w   r1, r6, abort=20f
-               ldr1w   r1, r7, abort=20f
-               ldr1w   r1, r8, abort=20f
-               ldr1w   r1, lr, abort=20f
-
-#if LDR1W_SHIFT < STR1W_SHIFT
-               lsl     ip, ip, #STR1W_SHIFT - LDR1W_SHIFT
-#elif LDR1W_SHIFT > STR1W_SHIFT
-               lsr     ip, ip, #LDR1W_SHIFT - STR1W_SHIFT
-#endif
-               add     pc, pc, ip
-               nop
-               .rept   (1 << STR1W_SHIFT)
-               W(nop)
-               .endr
-               str1w   r0, r3, abort=20f
-               str1w   r0, r4, abort=20f
-               str1w   r0, r5, abort=20f
-               str1w   r0, r6, abort=20f
-               str1w   r0, r7, abort=20f
-               str1w   r0, r8, abort=20f
-               str1w   r0, lr, abort=20f
-
-       CALGN(  bcs     2b                      )
-
-7:             ldmfd   sp!, {r5 - r8}
-
-8:             movs    r2, r2, lsl #31
-               ldr1b   r1, r3, ne, abort=21f
-               ldr1b   r1, r4, cs, abort=21f
-               ldr1b   r1, ip, cs, abort=21f
-               str1b   r0, r3, ne, abort=21f
-               str1b   r0, r4, cs, abort=21f
-               str1b   r0, ip, cs, abort=21f
-
-               exit    r4, pc
-
-9:             rsb     ip, ip, #4
-               cmp     ip, #2
-               ldr1b   r1, r3, gt, abort=21f
-               ldr1b   r1, r4, ge, abort=21f
-               ldr1b   r1, lr, abort=21f
-               str1b   r0, r3, gt, abort=21f
-               str1b   r0, r4, ge, abort=21f
-               subs    r2, r2, ip
-               str1b   r0, lr, abort=21f
-               blt     8b
-               ands    ip, r1, #3
-               beq     1b
-
-10:            bic     r1, r1, #3
-               cmp     ip, #2
-               ldr1w   r1, lr, abort=21f
-               beq     17f
-               bgt     18f
-
-
-               .macro  forward_copy_shift pull push
-
-               subs    r2, r2, #28
-               blt     14f
-
-       CALGN(  ands    ip, r0, #31             )
-       CALGN(  rsb     ip, ip, #32             )
-       CALGN(  sbcnes  r4, ip, r2              )  @ C is always set here
-       CALGN(  subcc   r2, r2, ip              )
-       CALGN(  bcc     15f                     )
-
-11:            stmfd   sp!, {r5 - r9}
-
-       PLD(    pld     [r1, #0]                )
-       PLD(    subs    r2, r2, #96             )
-       PLD(    pld     [r1, #28]               )
-       PLD(    blt     13f                     )
-       PLD(    pld     [r1, #60]               )
-       PLD(    pld     [r1, #92]               )
-
-12:    PLD(    pld     [r1, #124]              )
-13:            ldr4w   r1, r4, r5, r6, r7, abort=19f
-               mov     r3, lr, pull #\pull
-               subs    r2, r2, #32
-               ldr4w   r1, r8, r9, ip, lr, abort=19f
-               orr     r3, r3, r4, push #\push
-               mov     r4, r4, pull #\pull
-               orr     r4, r4, r5, push #\push
-               mov     r5, r5, pull #\pull
-               orr     r5, r5, r6, push #\push
-               mov     r6, r6, pull #\pull
-               orr     r6, r6, r7, push #\push
-               mov     r7, r7, pull #\pull
-               orr     r7, r7, r8, push #\push
-               mov     r8, r8, pull #\pull
-               orr     r8, r8, r9, push #\push
-               mov     r9, r9, pull #\pull
-               orr     r9, r9, ip, push #\push
-               mov     ip, ip, pull #\pull
-               orr     ip, ip, lr, push #\push
-               str8w   r0, r3, r4, r5, r6, r7, r8, r9, ip, , abort=19f
-               bge     12b
-       PLD(    cmn     r2, #96                 )
-       PLD(    bge     13b                     )
-
-               ldmfd   sp!, {r5 - r9}
-
-14:            ands    ip, r2, #28
-               beq     16f
-
-15:            mov     r3, lr, pull #\pull
-               ldr1w   r1, lr, abort=21f
-               subs    ip, ip, #4
-               orr     r3, r3, lr, push #\push
-               str1w   r0, r3, abort=21f
-               bgt     15b
-       CALGN(  cmp     r2, #0                  )
-       CALGN(  bge     11b                     )
-
-16:            sub     r1, r1, #(\push / 8)
-               b       8b
-
-               .endm
-
-
-               forward_copy_shift      pull=8  push=24
-
-17:            forward_copy_shift      pull=16 push=16
-
-18:            forward_copy_shift      pull=24 push=8
-
-
-/*
- * Abort preamble and completion macros.
- * If a fixup handler is required then those macros must surround it.
- * It is assumed that the fixup code will handle the private part of
- * the exit macro.
- */
-
-       .macro  copy_abort_preamble
-19:    ldmfd   sp!, {r5 - r9}
-       b       21f
-20:    ldmfd   sp!, {r5 - r8}
-21:
-       .endm
-
-       .macro  copy_abort_end
-       ldmfd   sp!, {r4, pc}
-       .endm
-
diff -r 984086ca8ca0 -r b094ba4bf985 xen/arch/arm/lib/div64.S
--- a/xen/arch/arm/lib/div64.S  Wed Dec 19 14:16:29 2012 +0000
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,211 +0,0 @@
-/*
- *  linux/arch/arm/lib/div64.S
- *
- *  Optimized computation of 64-bit dividend / 32-bit divisor
- *
- *  Author:    Nicolas Pitre
- *  Created:   Oct 5, 2003
- *  Copyright: Monta Vista Software, Inc.
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License version 2 as
- *  published by the Free Software Foundation.
- */
-
-#include <xen/config.h>
-#include "assembler.h"
-       
-#ifdef __ARMEB__
-#define xh r0
-#define xl r1
-#define yh r2
-#define yl r3
-#else
-#define xl r0
-#define xh r1
-#define yl r2
-#define yh r3
-#endif
-
-/*
- * __do_div64: perform a division with 64-bit dividend and 32-bit divisor.
- *
- * Note: Calling convention is totally non standard for optimal code.
- *       This is meant to be used by do_div() from include/asm/div64.h only.
- *
- * Input parameters:
- *     xh-xl   = dividend (clobbered)
- *     r4      = divisor (preserved)
- *
- * Output values:
- *     yh-yl   = result
- *     xh      = remainder
- *
- * Clobbered regs: xl, ip
- */
-
-ENTRY(__do_div64)
-UNWIND(.fnstart)
-
-       @ Test for easy paths first.
-       subs    ip, r4, #1
-       bls     9f                      @ divisor is 0 or 1
-       tst     ip, r4
-       beq     8f                      @ divisor is power of 2
-
-       @ See if we need to handle upper 32-bit result.
-       cmp     xh, r4
-       mov     yh, #0
-       blo     3f
-
-       @ Align divisor with upper part of dividend.
-       @ The aligned divisor is stored in yl preserving the original.
-       @ The bit position is stored in ip.
-
-#if __LINUX_ARM_ARCH__ >= 5
-
-       clz     yl, r4
-       clz     ip, xh
-       sub     yl, yl, ip
-       mov     ip, #1
-       mov     ip, ip, lsl yl
-       mov     yl, r4, lsl yl
-
-#else
-
-       mov     yl, r4
-       mov     ip, #1
-1:     cmp     yl, #0x80000000
-       cmpcc   yl, xh
-       movcc   yl, yl, lsl #1
-       movcc   ip, ip, lsl #1
-       bcc     1b
-
-#endif
-
-       @ The division loop for needed upper bit positions.
-       @ Break out early if dividend reaches 0.
-2:     cmp     xh, yl
-       orrcs   yh, yh, ip
-       subcss  xh, xh, yl
-       movnes  ip, ip, lsr #1
-       mov     yl, yl, lsr #1
-       bne     2b
-
-       @ See if we need to handle lower 32-bit result.
-3:     cmp     xh, #0
-       mov     yl, #0
-       cmpeq   xl, r4
-       movlo   xh, xl
-       movlo   pc, lr
-
-       @ The division loop for lower bit positions.
-       @ Here we shift remainer bits leftwards rather than moving the
-       @ divisor for comparisons, considering the carry-out bit as well.
-       mov     ip, #0x80000000
-4:     movs    xl, xl, lsl #1
-       adcs    xh, xh, xh
-       beq     6f
-       cmpcc   xh, r4
-5:     orrcs   yl, yl, ip
-       subcs   xh, xh, r4
-       movs    ip, ip, lsr #1
-       bne     4b
-       mov     pc, lr
-
-       @ The top part of remainder became zero.  If carry is set
-       @ (the 33th bit) this is a false positive so resume the loop.
-       @ Otherwise, if lower part is also null then we are done.
-6:     bcs     5b
-       cmp     xl, #0
-       moveq   pc, lr
-
-       @ We still have remainer bits in the low part.  Bring them up.
-
-#if __LINUX_ARM_ARCH__ >= 5
-
-       clz     xh, xl                  @ we know xh is zero here so...
-       add     xh, xh, #1
-       mov     xl, xl, lsl xh
-       mov     ip, ip, lsr xh
-
-#else
-
-7:     movs    xl, xl, lsl #1
-       mov     ip, ip, lsr #1
-       bcc     7b
-
-#endif
-
-       @ Current remainder is now 1.  It is worthless to compare with
-       @ divisor at this point since divisor can not be smaller than 3 here.
-       @ If possible, branch for another shift in the division loop.
-       @ If no bit position left then we are done.
-       movs    ip, ip, lsr #1
-       mov     xh, #1
-       bne     4b
-       mov     pc, lr
-
-8:     @ Division by a power of 2: determine what that divisor order is
-       @ then simply shift values around
-
-#if __LINUX_ARM_ARCH__ >= 5
-
-       clz     ip, r4
-       rsb     ip, ip, #31
-
-#else
-
-       mov     yl, r4
-       cmp     r4, #(1 << 16)
-       mov     ip, #0
-       movhs   yl, yl, lsr #16
-       movhs   ip, #16
-
-       cmp     yl, #(1 << 8)
-       movhs   yl, yl, lsr #8
-       addhs   ip, ip, #8
-
-       cmp     yl, #(1 << 4)
-       movhs   yl, yl, lsr #4
-       addhs   ip, ip, #4
-
-       cmp     yl, #(1 << 2)
-       addhi   ip, ip, #3
-       addls   ip, ip, yl, lsr #1
-
-#endif
-
-       mov     yh, xh, lsr ip
-       mov     yl, xl, lsr ip
-       rsb     ip, ip, #32
- ARM(  orr     yl, yl, xh, lsl ip      )
- THUMB(        lsl     xh, xh, ip              )
- THUMB(        orr     yl, yl, xh              )
-       mov     xh, xl, lsl ip
-       mov     xh, xh, lsr ip
-       mov     pc, lr
-
-       @ eq -> division by 1: obvious enough...
-9:     moveq   yl, xl
-       moveq   yh, xh
-       moveq   xh, #0
-       moveq   pc, lr
-UNWIND(.fnend)
-
-UNWIND(.fnstart)
-UNWIND(.pad #4)
-UNWIND(.save {lr})
-Ldiv0_64:
-       @ Division by 0:
-       str     lr, [sp, #-8]!
-       bl      __div0
-
-       @ as wrong as it could be...
-       mov     yl, #0
-       mov     yh, #0
-       mov     xh, #0
-       ldr     pc, [sp], #8
-
-UNWIND(.fnend)
-ENDPROC(__do_div64)
diff -r 984086ca8ca0 -r b094ba4bf985 xen/arch/arm/lib/findbit.S
--- a/xen/arch/arm/lib/findbit.S        Wed Dec 19 14:16:29 2012 +0000
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,198 +0,0 @@
-/*
- *  linux/arch/arm/lib/findbit.S
- *
- *  Copyright (C) 1995-2000 Russell King
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * 16th March 2001 - John Ripley <jripley@xxxxxxxxxxxxx>
- *   Fixed so that "size" is an exclusive not an inclusive quantity.
- *   All users of these functions expect exclusive sizes, and may
- *   also call with zero size.
- * Reworked by rmk.
- */
-
-#include <xen/config.h>
-
-#include "assembler.h"
-                .text
-
-/*
- * Purpose  : Find a 'zero' bit
- * Prototype: int find_first_zero_bit(void *addr, unsigned int maxbit);
- */
-ENTRY(_find_first_zero_bit_le)
-               teq     r1, #0  
-               beq     3f
-               mov     r2, #0
-1:
- ARM(          ldrb    r3, [r0, r2, lsr #3]    )
- THUMB(                lsr     r3, r2, #3              )
- THUMB(                ldrb    r3, [r0, r3]            )
-               eors    r3, r3, #0xff           @ invert bits
-               bne     .L_found                @ any now set - found zero bit
-               add     r2, r2, #8              @ next bit pointer
-2:             cmp     r2, r1                  @ any more?
-               blo     1b
-3:             mov     r0, r1                  @ no free bits
-               mov     pc, lr
-ENDPROC(_find_first_zero_bit_le)
-
-/*
- * Purpose  : Find next 'zero' bit
- * Prototype: int find_next_zero_bit(void *addr, unsigned int maxbit, int 
offset)
- */
-ENTRY(_find_next_zero_bit_le)
-               teq     r1, #0
-               beq     3b
-               ands    ip, r2, #7
-               beq     1b                      @ If new byte, goto old routine
- ARM(          ldrb    r3, [r0, r2, lsr #3]    )
- THUMB(                lsr     r3, r2, #3              )
- THUMB(                ldrb    r3, [r0, r3]            )
-               eor     r3, r3, #0xff           @ now looking for a 1 bit
-               movs    r3, r3, lsr ip          @ shift off unused bits
-               bne     .L_found
-               orr     r2, r2, #7              @ if zero, then no bits here
-               add     r2, r2, #1              @ align bit pointer
-               b       2b                      @ loop for next bit
-ENDPROC(_find_next_zero_bit_le)
-
-/*
- * Purpose  : Find a 'one' bit
- * Prototype: int find_first_bit(const unsigned long *addr, unsigned int 
maxbit);
- */
-ENTRY(_find_first_bit_le)
-               teq     r1, #0  
-               beq     3f
-               mov     r2, #0
-1:
- ARM(          ldrb    r3, [r0, r2, lsr #3]    )
- THUMB(                lsr     r3, r2, #3              )
- THUMB(                ldrb    r3, [r0, r3]            )
-               movs    r3, r3
-               bne     .L_found                @ any now set - found zero bit
-               add     r2, r2, #8              @ next bit pointer
-2:             cmp     r2, r1                  @ any more?
-               blo     1b
-3:             mov     r0, r1                  @ no free bits
-               mov     pc, lr
-ENDPROC(_find_first_bit_le)
-
-/*
- * Purpose  : Find next 'one' bit
- * Prototype: int find_next_zero_bit(void *addr, unsigned int maxbit, int 
offset)
- */
-ENTRY(_find_next_bit_le)
-               teq     r1, #0
-               beq     3b
-               ands    ip, r2, #7
-               beq     1b                      @ If new byte, goto old routine
- ARM(          ldrb    r3, [r0, r2, lsr #3]    )
- THUMB(                lsr     r3, r2, #3              )
- THUMB(                ldrb    r3, [r0, r3]            )
-               movs    r3, r3, lsr ip          @ shift off unused bits
-               bne     .L_found
-               orr     r2, r2, #7              @ if zero, then no bits here
-               add     r2, r2, #1              @ align bit pointer
-               b       2b                      @ loop for next bit
-ENDPROC(_find_next_bit_le)
-
-#ifdef __ARMEB__
-
-ENTRY(_find_first_zero_bit_be)
-               teq     r1, #0
-               beq     3f
-               mov     r2, #0
-1:             eor     r3, r2, #0x18           @ big endian byte ordering
- ARM(          ldrb    r3, [r0, r3, lsr #3]    )
- THUMB(                lsr     r3, #3                  )
- THUMB(                ldrb    r3, [r0, r3]            )
-               eors    r3, r3, #0xff           @ invert bits
-               bne     .L_found                @ any now set - found zero bit
-               add     r2, r2, #8              @ next bit pointer
-2:             cmp     r2, r1                  @ any more?
-               blo     1b
-3:             mov     r0, r1                  @ no free bits
-               mov     pc, lr
-ENDPROC(_find_first_zero_bit_be)
-
-ENTRY(_find_next_zero_bit_be)
-               teq     r1, #0
-               beq     3b
-               ands    ip, r2, #7
-               beq     1b                      @ If new byte, goto old routine
-               eor     r3, r2, #0x18           @ big endian byte ordering
- ARM(          ldrb    r3, [r0, r3, lsr #3]    )
- THUMB(                lsr     r3, #3                  )
- THUMB(                ldrb    r3, [r0, r3]            )
-               eor     r3, r3, #0xff           @ now looking for a 1 bit
-               movs    r3, r3, lsr ip          @ shift off unused bits
-               bne     .L_found
-               orr     r2, r2, #7              @ if zero, then no bits here
-               add     r2, r2, #1              @ align bit pointer
-               b       2b                      @ loop for next bit
-ENDPROC(_find_next_zero_bit_be)
-
-ENTRY(_find_first_bit_be)
-               teq     r1, #0
-               beq     3f
-               mov     r2, #0
-1:             eor     r3, r2, #0x18           @ big endian byte ordering
- ARM(          ldrb    r3, [r0, r3, lsr #3]    )
- THUMB(                lsr     r3, #3                  )
- THUMB(                ldrb    r3, [r0, r3]            )
-               movs    r3, r3
-               bne     .L_found                @ any now set - found zero bit
-               add     r2, r2, #8              @ next bit pointer
-2:             cmp     r2, r1                  @ any more?
-               blo     1b
-3:             mov     r0, r1                  @ no free bits
-               mov     pc, lr
-ENDPROC(_find_first_bit_be)
-
-ENTRY(_find_next_bit_be)
-               teq     r1, #0
-               beq     3b
-               ands    ip, r2, #7
-               beq     1b                      @ If new byte, goto old routine
-               eor     r3, r2, #0x18           @ big endian byte ordering
- ARM(          ldrb    r3, [r0, r3, lsr #3]    )
- THUMB(                lsr     r3, #3                  )
- THUMB(                ldrb    r3, [r0, r3]            )
-               movs    r3, r3, lsr ip          @ shift off unused bits
-               bne     .L_found
-               orr     r2, r2, #7              @ if zero, then no bits here
-               add     r2, r2, #1              @ align bit pointer
-               b       2b                      @ loop for next bit
-ENDPROC(_find_next_bit_be)
-
-#endif
-
-/*
- * One or more bits in the LSB of r3 are assumed to be set.
- */
-.L_found:
-#if __LINUX_ARM_ARCH__ >= 5
-               rsb     r0, r3, #0
-               and     r3, r3, r0
-               clz     r3, r3
-               rsb     r3, r3, #31
-               add     r0, r2, r3
-#else
-               tst     r3, #0x0f
-               addeq   r2, r2, #4
-               movne   r3, r3, lsl #4
-               tst     r3, #0x30
-               addeq   r2, r2, #2
-               movne   r3, r3, lsl #2
-               tst     r3, #0x40
-               addeq   r2, r2, #1
-               mov     r0, r2
-#endif
-               cmp     r1, r0                  @ Clamp to maxbit
-               movlo   r0, r1
-               mov     pc, lr
-
diff -r 984086ca8ca0 -r b094ba4bf985 xen/arch/arm/lib/lib1funcs.S
--- a/xen/arch/arm/lib/lib1funcs.S      Wed Dec 19 14:16:29 2012 +0000
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,389 +0,0 @@
-/*
- * linux/arch/arm/lib/lib1funcs.S: Optimized ARM division routines
- *
- * Author: Nicolas Pitre <nico@xxxxxxxxxxx>
- *   - contributed to gcc-3.4 on Sep 30, 2003
- *   - adapted for the Linux kernel on Oct 2, 2003
- */
-
-/* Copyright 1995, 1996, 1998, 1999, 2000, 2003 Free Software Foundation, Inc.
-
-This file is free software; you can redistribute it and/or modify it
-under the terms of the GNU General Public License as published by the
-Free Software Foundation; either version 2, or (at your option) any
-later version.
-
-In addition to the permissions in the GNU General Public License, the
-Free Software Foundation gives you unlimited permission to link the
-compiled version of this file into combinations with other programs,
-and to distribute those combinations without any restriction coming
-from the use of this file.  (The General Public License restrictions
-do apply in other respects; for example, they cover modification of
-the file, and distribution when not linked into a combine
-executable.)
-
-This file is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; see the file COPYING.  If not, write to
-the Free Software Foundation, 59 Temple Place - Suite 330,
-Boston, MA 02111-1307, USA.  */
-
-
-#include <xen/config.h>
-#include "assembler.h"
-
-.macro ARM_DIV_BODY dividend, divisor, result, curbit
-
-#if __LINUX_ARM_ARCH__ >= 5
-
-       clz     \curbit, \divisor
-       clz     \result, \dividend
-       sub     \result, \curbit, \result
-       mov     \curbit, #1
-       mov     \divisor, \divisor, lsl \result
-       mov     \curbit, \curbit, lsl \result
-       mov     \result, #0
-       
-#else
-
-       @ Initially shift the divisor left 3 bits if possible,
-       @ set curbit accordingly.  This allows for curbit to be located
-       @ at the left end of each 4 bit nibbles in the division loop
-       @ to save one loop in most cases.
-       tst     \divisor, #0xe0000000
-       moveq   \divisor, \divisor, lsl #3
-       moveq   \curbit, #8
-       movne   \curbit, #1
-
-       @ Unless the divisor is very big, shift it up in multiples of
-       @ four bits, since this is the amount of unwinding in the main
-       @ division loop.  Continue shifting until the divisor is 
-       @ larger than the dividend.
-1:     cmp     \divisor, #0x10000000
-       cmplo   \divisor, \dividend
-       movlo   \divisor, \divisor, lsl #4
-       movlo   \curbit, \curbit, lsl #4
-       blo     1b
-
-       @ For very big divisors, we must shift it a bit at a time, or
-       @ we will be in danger of overflowing.
-1:     cmp     \divisor, #0x80000000
-       cmplo   \divisor, \dividend
-       movlo   \divisor, \divisor, lsl #1
-       movlo   \curbit, \curbit, lsl #1
-       blo     1b
-
-       mov     \result, #0
-
-#endif
-
-       @ Division loop
-1:     cmp     \dividend, \divisor
-       subhs   \dividend, \dividend, \divisor
-       orrhs   \result,   \result,   \curbit
-       cmp     \dividend, \divisor,  lsr #1
-       subhs   \dividend, \dividend, \divisor, lsr #1
-       orrhs   \result,   \result,   \curbit,  lsr #1
-       cmp     \dividend, \divisor,  lsr #2
-       subhs   \dividend, \dividend, \divisor, lsr #2
-       orrhs   \result,   \result,   \curbit,  lsr #2
-       cmp     \dividend, \divisor,  lsr #3
-       subhs   \dividend, \dividend, \divisor, lsr #3
-       orrhs   \result,   \result,   \curbit,  lsr #3
-       cmp     \dividend, #0                   @ Early termination?
-       movnes  \curbit,   \curbit,  lsr #4     @ No, any more bits to do?
-       movne   \divisor,  \divisor, lsr #4
-       bne     1b
-
-.endm
-
-
-.macro ARM_DIV2_ORDER divisor, order
-
-#if __LINUX_ARM_ARCH__ >= 5
-
-       clz     \order, \divisor
-       rsb     \order, \order, #31
-
-#else
-
-       cmp     \divisor, #(1 << 16)
-       movhs   \divisor, \divisor, lsr #16
-       movhs   \order, #16
-       movlo   \order, #0
-
-       cmp     \divisor, #(1 << 8)
-       movhs   \divisor, \divisor, lsr #8
-       addhs   \order, \order, #8
-
-       cmp     \divisor, #(1 << 4)
-       movhs   \divisor, \divisor, lsr #4
-       addhs   \order, \order, #4
-
-       cmp     \divisor, #(1 << 2)
-       addhi   \order, \order, #3
-       addls   \order, \order, \divisor, lsr #1
-
-#endif
-
-.endm
-
-
-.macro ARM_MOD_BODY dividend, divisor, order, spare
-
-#if __LINUX_ARM_ARCH__ >= 5
-
-       clz     \order, \divisor
-       clz     \spare, \dividend
-       sub     \order, \order, \spare
-       mov     \divisor, \divisor, lsl \order
-
-#else
-
-       mov     \order, #0
-
-       @ Unless the divisor is very big, shift it up in multiples of
-       @ four bits, since this is the amount of unwinding in the main
-       @ division loop.  Continue shifting until the divisor is 
-       @ larger than the dividend.
-1:     cmp     \divisor, #0x10000000
-       cmplo   \divisor, \dividend
-       movlo   \divisor, \divisor, lsl #4
-       addlo   \order, \order, #4
-       blo     1b
-
-       @ For very big divisors, we must shift it a bit at a time, or
-       @ we will be in danger of overflowing.
-1:     cmp     \divisor, #0x80000000
-       cmplo   \divisor, \dividend
-       movlo   \divisor, \divisor, lsl #1
-       addlo   \order, \order, #1
-       blo     1b
-
-#endif
-
-       @ Perform all needed substractions to keep only the reminder.
-       @ Do comparisons in batch of 4 first.
-       subs    \order, \order, #3              @ yes, 3 is intended here
-       blt     2f
-
-1:     cmp     \dividend, \divisor
-       subhs   \dividend, \dividend, \divisor
-       cmp     \dividend, \divisor,  lsr #1
-       subhs   \dividend, \dividend, \divisor, lsr #1
-       cmp     \dividend, \divisor,  lsr #2
-       subhs   \dividend, \dividend, \divisor, lsr #2
-       cmp     \dividend, \divisor,  lsr #3
-       subhs   \dividend, \dividend, \divisor, lsr #3
-       cmp     \dividend, #1
-       mov     \divisor, \divisor, lsr #4
-       subges  \order, \order, #4
-       bge     1b
-
-       tst     \order, #3
-       teqne   \dividend, #0
-       beq     5f
-
-       @ Either 1, 2 or 3 comparison/substractions are left.
-2:     cmn     \order, #2
-       blt     4f
-       beq     3f
-       cmp     \dividend, \divisor
-       subhs   \dividend, \dividend, \divisor
-       mov     \divisor,  \divisor,  lsr #1
-3:     cmp     \dividend, \divisor
-       subhs   \dividend, \dividend, \divisor
-       mov     \divisor,  \divisor,  lsr #1
-4:     cmp     \dividend, \divisor
-       subhs   \dividend, \dividend, \divisor
-5:
-.endm
-
-
-ENTRY(__udivsi3)
-ENTRY(__aeabi_uidiv)
-UNWIND(.fnstart)
-
-       subs    r2, r1, #1
-       moveq   pc, lr
-       bcc     Ldiv0
-       cmp     r0, r1
-       bls     11f
-       tst     r1, r2
-       beq     12f
-
-       ARM_DIV_BODY r0, r1, r2, r3
-
-       mov     r0, r2
-       mov     pc, lr
-
-11:    moveq   r0, #1
-       movne   r0, #0
-       mov     pc, lr
-
-12:    ARM_DIV2_ORDER r1, r2
-
-       mov     r0, r0, lsr r2
-       mov     pc, lr
-
-UNWIND(.fnend)
-ENDPROC(__udivsi3)
-ENDPROC(__aeabi_uidiv)
-
-ENTRY(__umodsi3)
-UNWIND(.fnstart)
-
-       subs    r2, r1, #1                      @ compare divisor with 1
-       bcc     Ldiv0
-       cmpne   r0, r1                          @ compare dividend with divisor
-       moveq   r0, #0
-       tsthi   r1, r2                          @ see if divisor is power of 2
-       andeq   r0, r0, r2
-       movls   pc, lr
-
-       ARM_MOD_BODY r0, r1, r2, r3
-
-       mov     pc, lr
-
-UNWIND(.fnend)
-ENDPROC(__umodsi3)
-
-ENTRY(__divsi3)
-ENTRY(__aeabi_idiv)
-UNWIND(.fnstart)
-
-       cmp     r1, #0
-       eor     ip, r0, r1                      @ save the sign of the result.
-       beq     Ldiv0
-       rsbmi   r1, r1, #0                      @ loops below use unsigned.
-       subs    r2, r1, #1                      @ division by 1 or -1 ?
-       beq     10f
-       movs    r3, r0
-       rsbmi   r3, r0, #0                      @ positive dividend value
-       cmp     r3, r1
-       bls     11f
-       tst     r1, r2                          @ divisor is power of 2 ?
-       beq     12f
-
-       ARM_DIV_BODY r3, r1, r0, r2
-
-       cmp     ip, #0
-       rsbmi   r0, r0, #0
-       mov     pc, lr
-
-10:    teq     ip, r0                          @ same sign ?
-       rsbmi   r0, r0, #0
-       mov     pc, lr
-
-11:    movlo   r0, #0
-       moveq   r0, ip, asr #31
-       orreq   r0, r0, #1
-       mov     pc, lr
-
-12:    ARM_DIV2_ORDER r1, r2
-
-       cmp     ip, #0
-       mov     r0, r3, lsr r2
-       rsbmi   r0, r0, #0
-       mov     pc, lr
-
-UNWIND(.fnend)
-ENDPROC(__divsi3)
-ENDPROC(__aeabi_idiv)
-
-ENTRY(__modsi3)
-UNWIND(.fnstart)
-
-       cmp     r1, #0
-       beq     Ldiv0
-       rsbmi   r1, r1, #0                      @ loops below use unsigned.
-       movs    ip, r0                          @ preserve sign of dividend
-       rsbmi   r0, r0, #0                      @ if negative make positive
-       subs    r2, r1, #1                      @ compare divisor with 1
-       cmpne   r0, r1                          @ compare dividend with divisor
-       moveq   r0, #0
-       tsthi   r1, r2                          @ see if divisor is power of 2
-       andeq   r0, r0, r2
-       bls     10f
-
-       ARM_MOD_BODY r0, r1, r2, r3
-
-10:    cmp     ip, #0
-       rsbmi   r0, r0, #0
-       mov     pc, lr
-
-UNWIND(.fnend)
-ENDPROC(__modsi3)
-
-#ifdef CONFIG_AEABI
-
-ENTRY(__aeabi_uidivmod)
-UNWIND(.fnstart)
-UNWIND(.save {r0, r1, ip, lr}  )
-
-       stmfd   sp!, {r0, r1, ip, lr}
-       bl      __aeabi_uidiv
-       ldmfd   sp!, {r1, r2, ip, lr}
-       mul     r3, r0, r2
-       sub     r1, r1, r3
-       mov     pc, lr
-
-UNWIND(.fnend)
-ENDPROC(__aeabi_uidivmod)
-
-ENTRY(__aeabi_idivmod)
-UNWIND(.fnstart)
-UNWIND(.save {r0, r1, ip, lr}  )
-       stmfd   sp!, {r0, r1, ip, lr}
-       bl      __aeabi_idiv
-       ldmfd   sp!, {r1, r2, ip, lr}
-       mul     r3, r0, r2
-       sub     r1, r1, r3
-       mov     pc, lr
-
-UNWIND(.fnend)
-ENDPROC(__aeabi_idivmod)
-
-ENTRY(__aeabi_uldivmod)
-UNWIND(.fnstart)
-UNWIND(.save {lr}      )
-       sub sp, sp, #8
-       stmfd   sp!, {sp, lr}
-       bl __qdivrem
-       ldr lr, [sp, #4]
-       add sp, sp, #8
-       ldmfd sp!, {r2, r3}
-       mov     pc, lr
-
-UNWIND(.fnend)
-ENDPROC(__aeabi_uldivmod)
-
-ENTRY(__aeabi_ldivmod)
-UNWIND(.fnstart)
-UNWIND(.save {lr}      )
-       sub sp, sp, #16
-       stmfd   sp!, {sp, lr}
-       bl __ldivmod_helper
-       ldr lr, [sp, #4]
-       add sp, sp, #16
-       ldmfd   sp!, {r2, r3}
-       mov     pc, lr
-       
-UNWIND(.fnend)
-ENDPROC(__aeabi_ldivmod)
-#endif
-
-Ldiv0:
-UNWIND(.fnstart)
-UNWIND(.pad #4)
-UNWIND(.save {lr})
-       str     lr, [sp, #-8]!
-       bl      __div0
-       mov     r0, #0                  @ About as wrong as it could be.
-       ldr     pc, [sp], #8
-UNWIND(.fnend)
-ENDPROC(Ldiv0)
diff -r 984086ca8ca0 -r b094ba4bf985 xen/arch/arm/lib/lshrdi3.S
--- a/xen/arch/arm/lib/lshrdi3.S        Wed Dec 19 14:16:29 2012 +0000
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,54 +0,0 @@
-/* Copyright 1995, 1996, 1998, 1999, 2000, 2003, 2004, 2005
-   Free Software Foundation, Inc.
-
-This file is free software; you can redistribute it and/or modify it
-under the terms of the GNU General Public License as published by the
-Free Software Foundation; either version 2, or (at your option) any
-later version.
-
-In addition to the permissions in the GNU General Public License, the
-Free Software Foundation gives you unlimited permission to link the
-compiled version of this file into combinations with other programs,
-and to distribute those combinations without any restriction coming
-from the use of this file.  (The General Public License restrictions
-do apply in other respects; for example, they cover modification of
-the file, and distribution when not linked into a combine
-executable.)
-
-This file is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; see the file COPYING.  If not, write to
-the Free Software Foundation, 51 Franklin Street, Fifth Floor,
-Boston, MA 02110-1301, USA.  */
-
-
-#include <xen/config.h>
-#include "assembler.h"
-
-#ifdef __ARMEB__
-#define al r1
-#define ah r0
-#else
-#define al r0
-#define ah r1
-#endif
-
-ENTRY(__lshrdi3)
-ENTRY(__aeabi_llsr)
-
-       subs    r3, r2, #32
-       rsb     ip, r2, #32
-       movmi   al, al, lsr r2
-       movpl   al, ah, lsr r3
- ARM(  orrmi   al, al, ah, lsl ip      )
- THUMB(        lslmi   r3, ah, ip              )
- THUMB(        orrmi   al, al, r3              )
-       mov     ah, ah, lsr r2
-       mov     pc, lr
-
-ENDPROC(__lshrdi3)
-ENDPROC(__aeabi_llsr)
diff -r 984086ca8ca0 -r b094ba4bf985 xen/arch/arm/lib/memcpy.S
--- a/xen/arch/arm/lib/memcpy.S Wed Dec 19 14:16:29 2012 +0000
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,63 +0,0 @@
-/*
- *  linux/arch/arm/lib/memcpy.S
- *
- *  Author:    Nicolas Pitre
- *  Created:   Sep 28, 2005
- *  Copyright: MontaVista Software, Inc.
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License version 2 as
- *  published by the Free Software Foundation.
- */
-
-#include <xen/config.h>
-#include "assembler.h"
-
-#define LDR1W_SHIFT    0
-#define STR1W_SHIFT    0
-
-       .macro ldr1w ptr reg abort
-       W(ldr) \reg, [\ptr], #4
-       .endm
-
-       .macro ldr4w ptr reg1 reg2 reg3 reg4 abort
-       ldmia \ptr!, {\reg1, \reg2, \reg3, \reg4}
-       .endm
-
-       .macro ldr8w ptr reg1 reg2 reg3 reg4 reg5 reg6 reg7 reg8 abort
-       ldmia \ptr!, {\reg1, \reg2, \reg3, \reg4, \reg5, \reg6, \reg7, \reg8}
-       .endm
-
-       .macro ldr1b ptr reg cond=al abort
-       ldr\cond\()b \reg, [\ptr], #1
-       .endm
-
-       .macro str1w ptr reg abort
-       W(str) \reg, [\ptr], #4
-       .endm
-
-       .macro str8w ptr reg1 reg2 reg3 reg4 reg5 reg6 reg7 reg8 abort
-       stmia \ptr!, {\reg1, \reg2, \reg3, \reg4, \reg5, \reg6, \reg7, \reg8}
-       .endm
-
-       .macro str1b ptr reg cond=al abort
-       str\cond\()b \reg, [\ptr], #1
-       .endm
-
-       .macro enter reg1 reg2
-       stmdb sp!, {r0, \reg1, \reg2}
-       .endm
-
-       .macro exit reg1 reg2
-       ldmfd sp!, {r0, \reg1, \reg2}
-       .endm
-
-       .text
-
-/* Prototype: void *memcpy(void *dest, const void *src, size_t n); */
-
-ENTRY(memcpy)
-
-#include "copy_template.S"
-
-ENDPROC(memcpy)
diff -r 984086ca8ca0 -r b094ba4bf985 xen/arch/arm/lib/memmove.S
--- a/xen/arch/arm/lib/memmove.S        Wed Dec 19 14:16:29 2012 +0000
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,200 +0,0 @@
-/*
- *  linux/arch/arm/lib/memmove.S
- *
- *  Author:    Nicolas Pitre
- *  Created:   Sep 28, 2005
- *  Copyright: (C) MontaVista Software Inc.
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License version 2 as
- *  published by the Free Software Foundation.
- */
-
-#include <xen/config.h>
-
-#include "assembler.h"
-
-               .text
-
-/*
- * Prototype: void *memmove(void *dest, const void *src, size_t n);
- *
- * Note:
- *
- * If the memory regions don't overlap, we simply branch to memcpy which is
- * normally a bit faster. Otherwise the copy is done going downwards.  This
- * is a transposition of the code from copy_template.S but with the copy
- * occurring in the opposite direction.
- */
-
-ENTRY(memmove)
-
-               subs    ip, r0, r1
-               cmphi   r2, ip
-               bls     memcpy
-
-               stmfd   sp!, {r0, r4, lr}
-               add     r1, r1, r2
-               add     r0, r0, r2
-               subs    r2, r2, #4
-               blt     8f
-               ands    ip, r0, #3
-       PLD(    pld     [r1, #-4]               )
-               bne     9f
-               ands    ip, r1, #3
-               bne     10f
-
-1:             subs    r2, r2, #(28)
-               stmfd   sp!, {r5 - r8}
-               blt     5f
-
-       CALGN(  ands    ip, r0, #31             )
-       CALGN(  sbcnes  r4, ip, r2              )  @ C is always set here
-       CALGN(  bcs     2f                      )
-       CALGN(  adr     r4, 6f                  )
-       CALGN(  subs    r2, r2, ip              )  @ C is set here
-       CALGN(  rsb     ip, ip, #32             )
-       CALGN(  add     pc, r4, ip              )
-
-       PLD(    pld     [r1, #-4]               )
-2:     PLD(    subs    r2, r2, #96             )
-       PLD(    pld     [r1, #-32]              )
-       PLD(    blt     4f                      )
-       PLD(    pld     [r1, #-64]              )
-       PLD(    pld     [r1, #-96]              )
-
-3:     PLD(    pld     [r1, #-128]             )
-4:             ldmdb   r1!, {r3, r4, r5, r6, r7, r8, ip, lr}
-               subs    r2, r2, #32
-               stmdb   r0!, {r3, r4, r5, r6, r7, r8, ip, lr}
-               bge     3b
-       PLD(    cmn     r2, #96                 )
-       PLD(    bge     4b                      )
-
-5:             ands    ip, r2, #28
-               rsb     ip, ip, #32
-               addne   pc, pc, ip              @ C is always clear here
-               b       7f
-6:             W(nop)
-               W(ldr)  r3, [r1, #-4]!
-               W(ldr)  r4, [r1, #-4]!
-               W(ldr)  r5, [r1, #-4]!
-               W(ldr)  r6, [r1, #-4]!
-               W(ldr)  r7, [r1, #-4]!
-               W(ldr)  r8, [r1, #-4]!
-               W(ldr)  lr, [r1, #-4]!
-
-               add     pc, pc, ip
-               nop
-               W(nop)
-               W(str)  r3, [r0, #-4]!
-               W(str)  r4, [r0, #-4]!
-               W(str)  r5, [r0, #-4]!
-               W(str)  r6, [r0, #-4]!
-               W(str)  r7, [r0, #-4]!
-               W(str)  r8, [r0, #-4]!
-               W(str)  lr, [r0, #-4]!
-
-       CALGN(  bcs     2b                      )
-
-7:             ldmfd   sp!, {r5 - r8}
-
-8:             movs    r2, r2, lsl #31
-               ldrneb  r3, [r1, #-1]!
-               ldrcsb  r4, [r1, #-1]!
-               ldrcsb  ip, [r1, #-1]
-               strneb  r3, [r0, #-1]!
-               strcsb  r4, [r0, #-1]!
-               strcsb  ip, [r0, #-1]
-               ldmfd   sp!, {r0, r4, pc}
-
-9:             cmp     ip, #2
-               ldrgtb  r3, [r1, #-1]!
-               ldrgeb  r4, [r1, #-1]!
-               ldrb    lr, [r1, #-1]!
-               strgtb  r3, [r0, #-1]!
-               strgeb  r4, [r0, #-1]!
-               subs    r2, r2, ip
-               strb    lr, [r0, #-1]!
-               blt     8b
-               ands    ip, r1, #3
-               beq     1b
-
-10:            bic     r1, r1, #3
-               cmp     ip, #2
-               ldr     r3, [r1, #0]
-               beq     17f
-               blt     18f
-
-
-               .macro  backward_copy_shift push pull
-
-               subs    r2, r2, #28
-               blt     14f
-
-       CALGN(  ands    ip, r0, #31             )
-       CALGN(  sbcnes  r4, ip, r2              )  @ C is always set here
-       CALGN(  subcc   r2, r2, ip              )
-       CALGN(  bcc     15f                     )
-
-11:            stmfd   sp!, {r5 - r9}
-
-       PLD(    pld     [r1, #-4]               )
-       PLD(    subs    r2, r2, #96             )
-       PLD(    pld     [r1, #-32]              )
-       PLD(    blt     13f                     )
-       PLD(    pld     [r1, #-64]              )
-       PLD(    pld     [r1, #-96]              )
-
-12:    PLD(    pld     [r1, #-128]             )
-13:            ldmdb   r1!, {r7, r8, r9, ip}
-               mov     lr, r3, push #\push
-               subs    r2, r2, #32
-               ldmdb   r1!, {r3, r4, r5, r6}
-               orr     lr, lr, ip, pull #\pull
-               mov     ip, ip, push #\push
-               orr     ip, ip, r9, pull #\pull
-               mov     r9, r9, push #\push
-               orr     r9, r9, r8, pull #\pull
-               mov     r8, r8, push #\push
-               orr     r8, r8, r7, pull #\pull
-               mov     r7, r7, push #\push
-               orr     r7, r7, r6, pull #\pull
-               mov     r6, r6, push #\push
-               orr     r6, r6, r5, pull #\pull
-               mov     r5, r5, push #\push
-               orr     r5, r5, r4, pull #\pull
-               mov     r4, r4, push #\push
-               orr     r4, r4, r3, pull #\pull
-               stmdb   r0!, {r4 - r9, ip, lr}
-               bge     12b
-       PLD(    cmn     r2, #96                 )
-       PLD(    bge     13b                     )
-
-               ldmfd   sp!, {r5 - r9}
-
-14:            ands    ip, r2, #28
-               beq     16f
-
-15:            mov     lr, r3, push #\push
-               ldr     r3, [r1, #-4]!
-               subs    ip, ip, #4
-               orr     lr, lr, r3, pull #\pull
-               str     lr, [r0, #-4]!
-               bgt     15b
-       CALGN(  cmp     r2, #0                  )
-       CALGN(  bge     11b                     )
-
-16:            add     r1, r1, #(\pull / 8)
-               b       8b
-
-               .endm
-
-
-               backward_copy_shift     push=8  pull=24
-
-17:            backward_copy_shift     push=16 pull=16
-
-18:            backward_copy_shift     push=24 pull=8
-
-ENDPROC(memmove)
diff -r 984086ca8ca0 -r b094ba4bf985 xen/arch/arm/lib/memset.S
--- a/xen/arch/arm/lib/memset.S Wed Dec 19 14:16:29 2012 +0000
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,129 +0,0 @@
-/*
- *  linux/arch/arm/lib/memset.S
- *
- *  Copyright (C) 1995-2000 Russell King
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- *  ASM optimised string functions
- */
-
-#include <xen/config.h>
-
-#include "assembler.h"
-
-       .text
-       .align  5
-       .word   0
-
-1:     subs    r2, r2, #4              @ 1 do we have enough
-       blt     5f                      @ 1 bytes to align with?
-       cmp     r3, #2                  @ 1
-       strltb  r1, [r0], #1            @ 1
-       strleb  r1, [r0], #1            @ 1
-       strb    r1, [r0], #1            @ 1
-       add     r2, r2, r3              @ 1 (r2 = r2 - (4 - r3))
-/*
- * The pointer is now aligned and the length is adjusted.  Try doing the
- * memset again.
- */
-
-ENTRY(memset)
-       ands    r3, r0, #3              @ 1 unaligned?
-       bne     1b                      @ 1
-/*
- * we know that the pointer in r0 is aligned to a word boundary.
- */
-       orr     r1, r1, r1, lsl #8
-       orr     r1, r1, r1, lsl #16
-       mov     r3, r1
-       cmp     r2, #16
-       blt     4f
-
-#if ! CALGN(1)+0
-
-/*
- * We need an extra register for this loop - save the return address and
- * use the LR
- */
-       str     lr, [sp, #-4]!
-       mov     ip, r1
-       mov     lr, r1
-
-2:     subs    r2, r2, #64
-       stmgeia r0!, {r1, r3, ip, lr}   @ 64 bytes at a time.
-       stmgeia r0!, {r1, r3, ip, lr}
-       stmgeia r0!, {r1, r3, ip, lr}
-       stmgeia r0!, {r1, r3, ip, lr}
-       bgt     2b
-       ldmeqfd sp!, {pc}               @ Now <64 bytes to go.
-/*
- * No need to correct the count; we're only testing bits from now on
- */
-       tst     r2, #32
-       stmneia r0!, {r1, r3, ip, lr}
-       stmneia r0!, {r1, r3, ip, lr}
-       tst     r2, #16
-       stmneia r0!, {r1, r3, ip, lr}
-       ldr     lr, [sp], #4
-
-#else
-
-/*
- * This version aligns the destination pointer in order to write
- * whole cache lines at once.
- */
-
-       stmfd   sp!, {r4-r7, lr}
-       mov     r4, r1
-       mov     r5, r1
-       mov     r6, r1
-       mov     r7, r1
-       mov     ip, r1
-       mov     lr, r1
-
-       cmp     r2, #96
-       tstgt   r0, #31
-       ble     3f
-
-       and     ip, r0, #31
-       rsb     ip, ip, #32
-       sub     r2, r2, ip
-       movs    ip, ip, lsl #(32 - 4)
-       stmcsia r0!, {r4, r5, r6, r7}
-       stmmiia r0!, {r4, r5}
-       tst     ip, #(1 << 30)
-       mov     ip, r1
-       strne   r1, [r0], #4
-
-3:     subs    r2, r2, #64
-       stmgeia r0!, {r1, r3-r7, ip, lr}
-       stmgeia r0!, {r1, r3-r7, ip, lr}
-       bgt     3b
-       ldmeqfd sp!, {r4-r7, pc}
-
-       tst     r2, #32
-       stmneia r0!, {r1, r3-r7, ip, lr}
-       tst     r2, #16
-       stmneia r0!, {r4-r7}
-       ldmfd   sp!, {r4-r7, lr}
-
-#endif
-
-4:     tst     r2, #8
-       stmneia r0!, {r1, r3}
-       tst     r2, #4
-       strne   r1, [r0], #4
-/*
- * When we get here, we've got less than 4 bytes to zero.  We
- * may have an unaligned pointer as well.
- */
-5:     tst     r2, #2
-       strneb  r1, [r0], #1
-       strneb  r1, [r0], #1
-       tst     r2, #1
-       strneb  r1, [r0], #1
-       mov     pc, lr
-ENDPROC(memset)
diff -r 984086ca8ca0 -r b094ba4bf985 xen/arch/arm/lib/memzero.S
--- a/xen/arch/arm/lib/memzero.S        Wed Dec 19 14:16:29 2012 +0000
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,127 +0,0 @@
-/*
- *  linux/arch/arm/lib/memzero.S
- *
- *  Copyright (C) 1995-2000 Russell King
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <xen/config.h>
-
-#include "assembler.h"
-
-       .text
-       .align  5
-       .word   0
-/*
- * Align the pointer in r0.  r3 contains the number of bytes that we are
- * mis-aligned by, and r1 is the number of bytes.  If r1 < 4, then we
- * don't bother; we use byte stores instead.
- */
-1:     subs    r1, r1, #4              @ 1 do we have enough
-       blt     5f                      @ 1 bytes to align with?
-       cmp     r3, #2                  @ 1
-       strltb  r2, [r0], #1            @ 1
-       strleb  r2, [r0], #1            @ 1
-       strb    r2, [r0], #1            @ 1
-       add     r1, r1, r3              @ 1 (r1 = r1 - (4 - r3))
-/*
- * The pointer is now aligned and the length is adjusted.  Try doing the
- * memzero again.
- */
-
-ENTRY(__memzero)
-       mov     r2, #0                  @ 1
-       ands    r3, r0, #3              @ 1 unaligned?
-       bne     1b                      @ 1
-/*
- * r3 = 0, and we know that the pointer in r0 is aligned to a word boundary.
- */
-       cmp     r1, #16                 @ 1 we can skip this chunk if we
-       blt     4f                      @ 1 have < 16 bytes
-
-#if ! CALGN(1)+0
-
-/*
- * We need an extra register for this loop - save the return address and
- * use the LR
- */
-       str     lr, [sp, #-4]!          @ 1
-       mov     ip, r2                  @ 1
-       mov     lr, r2                  @ 1
-
-3:     subs    r1, r1, #64             @ 1 write 32 bytes out per loop
-       stmgeia r0!, {r2, r3, ip, lr}   @ 4
-       stmgeia r0!, {r2, r3, ip, lr}   @ 4
-       stmgeia r0!, {r2, r3, ip, lr}   @ 4
-       stmgeia r0!, {r2, r3, ip, lr}   @ 4
-       bgt     3b                      @ 1
-       ldmeqfd sp!, {pc}               @ 1/2 quick exit
-/*
- * No need to correct the count; we're only testing bits from now on
- */
-       tst     r1, #32                 @ 1
-       stmneia r0!, {r2, r3, ip, lr}   @ 4
-       stmneia r0!, {r2, r3, ip, lr}   @ 4
-       tst     r1, #16                 @ 1 16 bytes or more?
-       stmneia r0!, {r2, r3, ip, lr}   @ 4
-       ldr     lr, [sp], #4            @ 1
-
-#else
-
-/*
- * This version aligns the destination pointer in order to write
- * whole cache lines at once.
- */
-
-       stmfd   sp!, {r4-r7, lr}
-       mov     r4, r2
-       mov     r5, r2
-       mov     r6, r2
-       mov     r7, r2
-       mov     ip, r2
-       mov     lr, r2
-
-       cmp     r1, #96
-       andgts  ip, r0, #31
-       ble     3f
-
-       rsb     ip, ip, #32
-       sub     r1, r1, ip
-       movs    ip, ip, lsl #(32 - 4)
-       stmcsia r0!, {r4, r5, r6, r7}
-       stmmiia r0!, {r4, r5}
-       movs    ip, ip, lsl #2
-       strcs   r2, [r0], #4
-
-3:     subs    r1, r1, #64
-       stmgeia r0!, {r2-r7, ip, lr}
-       stmgeia r0!, {r2-r7, ip, lr}
-       bgt     3b
-       ldmeqfd sp!, {r4-r7, pc}
-
-       tst     r1, #32
-       stmneia r0!, {r2-r7, ip, lr}
-       tst     r1, #16
-       stmneia r0!, {r4-r7}
-       ldmfd   sp!, {r4-r7, lr}
-
-#endif
-
-4:     tst     r1, #8                  @ 1 8 bytes or more?
-       stmneia r0!, {r2, r3}           @ 2
-       tst     r1, #4                  @ 1 4 bytes or more?
-       strne   r2, [r0], #4            @ 1
-/*
- * When we get here, we've got less than 4 bytes to zero.  We
- * may have an unaligned pointer as well.
- */
-5:     tst     r1, #2                  @ 1 2 bytes or more?
-       strneb  r2, [r0], #1            @ 1
-       strneb  r2, [r0], #1            @ 1
-       tst     r1, #1                  @ 1 a byte left over
-       strneb  r2, [r0], #1            @ 1
-       mov     pc, lr                  @ 1
-ENDPROC(__memzero)
diff -r 984086ca8ca0 -r b094ba4bf985 xen/arch/arm/lib/setbit.S
--- a/xen/arch/arm/lib/setbit.S Wed Dec 19 14:16:29 2012 +0000
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,18 +0,0 @@
-/*
- *  linux/arch/arm/lib/setbit.S
- *
- *  Copyright (C) 1995-1996 Russell King
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-#include <xen/config.h>
-
-#include "assembler.h"
-#include "bitops.h"
-       .text
-
-ENTRY(_set_bit)
-       bitop   orr
-ENDPROC(_set_bit)
diff -r 984086ca8ca0 -r b094ba4bf985 xen/arch/arm/lib/testchangebit.S
--- a/xen/arch/arm/lib/testchangebit.S  Wed Dec 19 14:16:29 2012 +0000
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,18 +0,0 @@
-/*
- *  linux/arch/arm/lib/testchangebit.S
- *
- *  Copyright (C) 1995-1996 Russell King
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-#include <xen/config.h>
-
-#include "assembler.h"
-#include "bitops.h"
-                .text
-
-ENTRY(_test_and_change_bit)
-       testop  eor, str
-ENDPROC(_test_and_change_bit)
diff -r 984086ca8ca0 -r b094ba4bf985 xen/arch/arm/lib/testclearbit.S
--- a/xen/arch/arm/lib/testclearbit.S   Wed Dec 19 14:16:29 2012 +0000
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,18 +0,0 @@
-/*
- *  linux/arch/arm/lib/testclearbit.S
- *
- *  Copyright (C) 1995-1996 Russell King
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-#include <xen/config.h>
-
-#include "assembler.h"
-#include "bitops.h"
-                .text
-
-ENTRY(_test_and_clear_bit)
-       testop  bicne, strne
-ENDPROC(_test_and_clear_bit)
diff -r 984086ca8ca0 -r b094ba4bf985 xen/arch/arm/lib/testsetbit.S
--- a/xen/arch/arm/lib/testsetbit.S     Wed Dec 19 14:16:29 2012 +0000
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,18 +0,0 @@
-/*
- *  linux/arch/arm/lib/testsetbit.S
- *
- *  Copyright (C) 1995-1996 Russell King
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-#include <xen/config.h>
-
-#include "assembler.h"
-#include "bitops.h"
-                .text
-
-ENTRY(_test_and_set_bit)
-       testop  orreq, streq
-ENDPROC(_test_and_set_bit)
diff -r 984086ca8ca0 -r b094ba4bf985 xen/arch/arm/mode_switch.S
--- a/xen/arch/arm/mode_switch.S        Wed Dec 19 14:16:29 2012 +0000
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,121 +0,0 @@
-/*
- * xen/arch/arm/mode_switch.S
- *
- * Start-of day code to take a CPU from Secure mode to Hyp mode.
- *
- * Tim Deegan <tim@xxxxxxx>
- * Copyright (c) 2011-2012 Citrix Systems.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- */
-
-#include <asm/config.h>
-#include <asm/page.h>
-#include <asm/platform_vexpress.h>
-#include <asm/asm_defns.h>
-#include "gic.h"
-
-
-/* XXX: Versatile Express specific code */
-/* wake up secondary cpus */
-.globl kick_cpus
-kick_cpus:
-        /* write start paddr to v2m sysreg FLAGSSET register */
-        ldr   r0, =(V2M_SYS_MMIO_BASE)        /* base V2M sysreg MMIO address 
*/
-        dsb
-        mov   r2, #0xffffffff
-        str   r2, [r0, #(V2M_SYS_FLAGSCLR)]
-        dsb
-        ldr   r2, =start
-        add   r2, r2, r10
-        str   r2, [r0, #(V2M_SYS_FLAGSSET)]
-        dsb
-        /* send an interrupt */
-        ldr   r0, =(GIC_BASE_ADDRESS + GIC_DR_OFFSET) /* base GICD MMIO 
address */
-        mov   r2, #0x1
-        str   r2, [r0, #(GICD_CTLR * 4)]      /* enable distributor */
-        mov   r2, #0xfe0000
-        str   r2, [r0, #(GICD_SGIR * 4)]      /* send IPI to everybody */
-        dsb
-        str   r2, [r0, #(GICD_CTLR * 4)]      /* disable distributor */
-        mov   pc, lr
-
-
-/* Get up a CPU into Hyp mode.  Clobbers r0-r3.
- *
- * Expects r12 == CPU number
- *
- * This code is specific to the VE model, and not intended to be used
- * on production systems.  As such it's a bit hackier than the main
- * boot code in head.S.  In future it will be replaced by better
- * integration with the bootloader/firmware so that Xen always starts
- * in Hyp mode. */
-
-.globl enter_hyp_mode
-enter_hyp_mode:
-        mov   r3, lr                 /* Put return address in non-banked reg */
-        cpsid aif, #0x16             /* Enter Monitor mode */
-        mrc   CP32(r0, SCR)
-        orr   r0, r0, #0x100         /* Set HCE */
-        orr   r0, r0, #0xb1          /* Set SCD, AW, FW and NS */
-        bic   r0, r0, #0xe           /* Clear EA, FIQ and IRQ */
-        mcr   CP32(r0, SCR)
-        /* Ugly: the system timer's frequency register is only
-         * programmable in Secure state.  Since we don't know where its
-         * memory-mapped control registers live, we can't find out the
-         * right frequency.  Use the VE model's default frequency here. */
-        ldr   r0, =0x5f5e100         /* 100 MHz */
-        mcr   CP32(r0, CNTFRQ)
-        ldr   r0, =0x40c00           /* SMP, c11, c10 in non-secure mode */
-        mcr   CP32(r0, NSACR)
-        mov   r0, #GIC_BASE_ADDRESS
-        add   r0, r0, #GIC_DR_OFFSET
-        /* Disable the GIC distributor, on the boot CPU only */
-        mov   r1, #0
-        teq   r12, #0                /* Is this the boot CPU? */
-        streq r1, [r0]
-        /* Continuing ugliness: Set up the GIC so NS state owns interrupts,
-         * The first 32 interrupts (SGIs & PPIs) must be configured on all
-         * CPUs while the remainder are SPIs and only need to be done one, on
-         * the boot CPU. */
-        add   r0, r0, #0x80          /* GICD_IGROUP0 */
-        mov   r2, #0xffffffff        /* All interrupts to group 1 */
-        teq   r12, #0                /* Boot CPU? */
-        str   r2, [r0]               /* Interrupts  0-31 (SGI & PPI) */
-        streq r2, [r0, #4]           /* Interrupts 32-63 (SPI) */
-        streq r2, [r0, #8]           /* Interrupts 64-95 (SPI) */
-        /* Disable the GIC CPU interface on all processors */
-        mov   r0, #GIC_BASE_ADDRESS
-        add   r0, r0, #GIC_CR_OFFSET
-        mov   r1, #0
-        str   r1, [r0]
-        /* Must drop priority mask below 0x80 before entering NS state */
-        ldr   r1, =0xff
-        str   r1, [r0, #0x4]         /* -> GICC_PMR */
-        /* Reset a few config registers */
-        mov   r0, #0
-        mcr   CP32(r0, FCSEIDR)
-        mcr   CP32(r0, CONTEXTIDR)
-        /* Allow non-secure access to coprocessors, FIQs, VFP and NEON */
-        ldr   r1, =0x3fff            /* 14 CP bits set, all others clear */
-        mcr   CP32(r1, NSACR)
-
-        mrs   r0, cpsr               /* Copy the CPSR */
-        add   r0, r0, #0x4           /* 0x16 (Monitor) -> 0x1a (Hyp) */
-        msr   spsr_cxsf, r0          /* into the SPSR */
-        movs  pc, r3                 /* Exception-return into Hyp mode */
-
-/*
- * Local variables:
- * mode: ASM
- * indent-tabs-mode: nil
- * End:
- */
diff -r 984086ca8ca0 -r b094ba4bf985 xen/arch/arm/p2m.c
--- a/xen/arch/arm/p2m.c        Wed Dec 19 14:16:29 2012 +0000
+++ b/xen/arch/arm/p2m.c        Wed Dec 19 14:16:30 2012 +0000
@@ -4,7 +4,7 @@
 #include <xen/errno.h>
 #include <xen/domain_page.h>
 #include <asm/flushtlb.h>
-#include "gic.h"
+#include <asm/gic.h>
 
 void dump_p2m_lookup(struct domain *d, paddr_t addr)
 {
diff -r 984086ca8ca0 -r b094ba4bf985 xen/arch/arm/proc-ca15.S
--- a/xen/arch/arm/proc-ca15.S  Wed Dec 19 14:16:29 2012 +0000
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,35 +0,0 @@
-/*
- * xen/arch/arm/proc-ca15.S
- *
- * Cortex A15 specific initializations
- *
- * Copyright (c) 2011 Citrix Systems.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- */
-
-#include <asm/asm_defns.h>
-#include <asm/processor-ca15.h>
-
-.globl cortex_a15_init
-cortex_a15_init:
-        /* Set up the SMP bit in ACTLR */
-        mrc   CP32(r0, ACTLR)
-        orr   r0, r0, #(ACTLR_CA15_SMP) /* enable SMP bit */
-        mcr   CP32(r0, ACTLR)
-        mov   pc, lr
-
-/*
- * Local variables:
- * mode: ASM
- * indent-tabs-mode: nil
- * End:
- */
diff -r 984086ca8ca0 -r b094ba4bf985 xen/arch/arm/setup.c
--- a/xen/arch/arm/setup.c      Wed Dec 19 14:16:29 2012 +0000
+++ b/xen/arch/arm/setup.c      Wed Dec 19 14:16:30 2012 +0000
@@ -39,7 +39,7 @@
 #include <asm/setup.h>
 #include <asm/vfp.h>
 #include <asm/early_printk.h>
-#include "gic.h"
+#include <asm/gic.h>
 
 static __used void init_done(void)
 {
diff -r 984086ca8ca0 -r b094ba4bf985 xen/arch/arm/smpboot.c
--- a/xen/arch/arm/smpboot.c    Wed Dec 19 14:16:29 2012 +0000
+++ b/xen/arch/arm/smpboot.c    Wed Dec 19 14:16:30 2012 +0000
@@ -29,7 +29,7 @@
 #include <xen/timer.h>
 #include <xen/irq.h>
 #include <asm/vfp.h>
-#include "gic.h"
+#include <asm/gic.h>
 
 cpumask_t cpu_online_map;
 EXPORT_SYMBOL(cpu_online_map);
diff -r 984086ca8ca0 -r b094ba4bf985 xen/arch/arm/traps.c
--- a/xen/arch/arm/traps.c      Wed Dec 19 14:16:29 2012 +0000
+++ b/xen/arch/arm/traps.c      Wed Dec 19 14:16:30 2012 +0000
@@ -35,7 +35,7 @@
 
 #include "io.h"
 #include "vtimer.h"
-#include "gic.h"
+#include <asm/gic.h>
 
 /* The base of the stack must always be double-word aligned, which means
  * that both the kernel half of struct cpu_user_regs (which is pushed in
diff -r 984086ca8ca0 -r b094ba4bf985 xen/arch/arm/vgic.c
--- a/xen/arch/arm/vgic.c       Wed Dec 19 14:16:29 2012 +0000
+++ b/xen/arch/arm/vgic.c       Wed Dec 19 14:16:30 2012 +0000
@@ -27,7 +27,7 @@
 #include <asm/current.h>
 
 #include "io.h"
-#include "gic.h"
+#include <asm/gic.h>
 
 #define VGIC_DISTR_BASE_ADDRESS 0x000000002c001000
 
diff -r 984086ca8ca0 -r b094ba4bf985 xen/arch/arm/vtimer.c
--- a/xen/arch/arm/vtimer.c     Wed Dec 19 14:16:29 2012 +0000
+++ b/xen/arch/arm/vtimer.c     Wed Dec 19 14:16:30 2012 +0000
@@ -21,7 +21,7 @@
 #include <xen/lib.h>
 #include <xen/timer.h>
 #include <xen/sched.h>
-#include "gic.h"
+#include <asm/gic.h>
 
 extern s_time_t ticks_to_ns(uint64_t ticks);
 extern uint64_t ns_to_ticks(s_time_t ns);
diff -r 984086ca8ca0 -r b094ba4bf985 xen/include/asm-arm/gic.h
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/include/asm-arm/gic.h Wed Dec 19 14:16:30 2012 +0000
@@ -0,0 +1,170 @@
+/*
+ * ARM Generic Interrupt Controller support
+ *
+ * Tim Deegan <tim@xxxxxxx>
+ * Copyright (c) 2011 Citrix Systems.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef __ASM_ARM_GIC_H__
+#define __ASM_ARM_GIC_H__
+
+#define GICD_CTLR       (0x000/4)
+#define GICD_TYPER      (0x004/4)
+#define GICD_IIDR       (0x008/4)
+#define GICD_IGROUPR    (0x080/4)
+#define GICD_IGROUPRN   (0x0FC/4)
+#define GICD_ISENABLER  (0x100/4)
+#define GICD_ISENABLERN (0x17C/4)
+#define GICD_ICENABLER  (0x180/4)
+#define GICD_ICENABLERN (0x1fC/4)
+#define GICD_ISPENDR    (0x200/4)
+#define GICD_ISPENDRN   (0x27C/4)
+#define GICD_ICPENDR    (0x280/4)
+#define GICD_ICPENDRN   (0x2FC/4)
+#define GICD_ISACTIVER  (0x300/4)
+#define GICD_ISACTIVERN (0x37C/4)
+#define GICD_ICACTIVER  (0x380/4)
+#define GICD_ICACTIVERN (0x3FC/4)
+#define GICD_IPRIORITYR (0x400/4)
+#define GICD_IPRIORITYRN (0x7F8/4)
+#define GICD_ITARGETSR  (0x800/4)
+#define GICD_ITARGETSRN (0xBF8/4)
+#define GICD_ICFGR      (0xC00/4)
+#define GICD_ICFGRN     (0xCFC/4)
+#define GICD_NSACR      (0xE00/4)
+#define GICD_NSACRN     (0xEFC/4)
+#define GICD_ICPIDR2    (0xFE8/4)
+#define GICD_SGIR       (0xF00/4)
+#define GICD_CPENDSGIR  (0xF10/4)
+#define GICD_CPENDSGIRN (0xF1C/4)
+#define GICD_SPENDSGIR  (0xF20/4)
+#define GICD_SPENDSGIRN (0xF2C/4)
+#define GICD_ICPIDR2    (0xFE8/4)
+
+#define GICC_CTLR       (0x0000/4)
+#define GICC_PMR        (0x0004/4)
+#define GICC_BPR        (0x0008/4)
+#define GICC_IAR        (0x000C/4)
+#define GICC_EOIR       (0x0010/4)
+#define GICC_RPR        (0x0014/4)
+#define GICC_HPPIR      (0x0018/4)
+#define GICC_APR        (0x00D0/4)
+#define GICC_NSAPR      (0x00E0/4)
+#define GICC_DIR        (0x1000/4)
+
+#define GICH_HCR        (0x00/4)
+#define GICH_VTR        (0x04/4)
+#define GICH_VMCR       (0x08/4)
+#define GICH_MISR       (0x10/4)
+#define GICH_EISR0      (0x20/4)
+#define GICH_EISR1      (0x24/4)
+#define GICH_ELSR0      (0x30/4)
+#define GICH_ELSR1      (0x34/4)
+#define GICH_APR        (0xF0/4)
+#define GICH_LR         (0x100/4)
+
+/* Register bits */
+#define GICD_CTL_ENABLE 0x1
+
+#define GICD_TYPE_LINES 0x01f
+#define GICD_TYPE_CPUS  0x0e0
+#define GICD_TYPE_SEC   0x400
+
+#define GICC_CTL_ENABLE 0x1
+#define GICC_CTL_EOI    (0x1 << 9)
+
+#define GICC_IA_IRQ     0x03ff
+#define GICC_IA_CPU     0x1c00
+
+#define GICH_HCR_EN       (1 << 0)
+#define GICH_HCR_UIE      (1 << 1)
+#define GICH_HCR_LRENPIE  (1 << 2)
+#define GICH_HCR_NPIE     (1 << 3)
+#define GICH_HCR_VGRP0EIE (1 << 4)
+#define GICH_HCR_VGRP0DIE (1 << 5)
+#define GICH_HCR_VGRP1EIE (1 << 6)
+#define GICH_HCR_VGRP1DIE (1 << 7)
+
+#define GICH_MISR_EOI     (1 << 0)
+#define GICH_MISR_U       (1 << 1)
+#define GICH_MISR_LRENP   (1 << 2)
+#define GICH_MISR_NP      (1 << 3)
+#define GICH_MISR_VGRP0E  (1 << 4)
+#define GICH_MISR_VGRP0D  (1 << 5)
+#define GICH_MISR_VGRP1E  (1 << 6)
+#define GICH_MISR_VGRP1D  (1 << 7)
+
+#define GICH_LR_VIRTUAL_MASK    0x3ff
+#define GICH_LR_VIRTUAL_SHIFT   0
+#define GICH_LR_PHYSICAL_MASK   0x3ff
+#define GICH_LR_PHYSICAL_SHIFT  10
+#define GICH_LR_STATE_MASK      0x3
+#define GICH_LR_STATE_SHIFT     28
+#define GICH_LR_PRIORITY_SHIFT  23
+#define GICH_LR_MAINTENANCE_IRQ (1<<19)
+#define GICH_LR_PENDING         (1<<28)
+#define GICH_LR_ACTIVE          (1<<29)
+#define GICH_LR_GRP1            (1<<30)
+#define GICH_LR_HW              (1<<31)
+#define GICH_LR_CPUID_SHIFT     9
+#define GICH_VTR_NRLRGS         0x3f
+
+/* XXX: write this into the DT */
+#define VGIC_IRQ_EVTCHN_CALLBACK 31
+
+#ifndef __ASSEMBLY__
+extern int domain_vgic_init(struct domain *d);
+extern void domain_vgic_free(struct domain *d);
+
+extern int vcpu_vgic_init(struct vcpu *v);
+
+extern void vgic_vcpu_inject_irq(struct vcpu *v, unsigned int irq,int virtual);
+extern struct pending_irq *irq_to_pending(struct vcpu *v, unsigned int irq);
+
+extern void gic_route_ppis(void);
+extern void gic_route_spis(void);
+
+extern void gic_inject(void);
+
+extern void __cpuinit init_maintenance_interrupt(void);
+extern void gic_set_guest_irq(struct vcpu *v, unsigned int irq,
+        unsigned int state, unsigned int priority);
+extern int gic_route_irq_to_guest(struct domain *d, unsigned int irq,
+                                  const char * devname);
+
+/* Accept an interrupt from the GIC and dispatch its handler */
+extern void gic_interrupt(struct cpu_user_regs *regs, int is_fiq);
+/* Bring up the interrupt controller, and report # cpus attached */
+extern void gic_init(void);
+/* Bring up a secondary CPU's per-CPU GIC interface */
+extern void gic_init_secondary_cpu(void);
+/* Take down a CPU's per-CPU GIC interface */
+extern void gic_disable_cpu(void);
+/* setup the gic virtual interface for a guest */
+extern int gicv_setup(struct domain *d);
+
+/* Context switch */
+extern void gic_save_state(struct vcpu *v);
+extern void gic_restore_state(struct vcpu *v);
+
+#endif /* __ASSEMBLY__ */
+#endif
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * indent-tabs-mode: nil
+ * End:
+ */

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.