Xen project Mailing List

[Minios-devel] [UNIKRAFT PATCH 3/9] build: Move arm32 libraries to new family/architecture folder

To: <minios-devel@xxxxxxxxxxxxxxxxxxxx>, <simon.kuenzer@xxxxxxxxx>

Date: Thu, 15 Mar 2018 11:39:06 +0800

Authentication-results: spf=pass (sender IP is 217.140.96.140) smtp.mailfrom=arm.com; neclab.eu; dkim=none (message not signed) header.d=none; neclab.eu; dmarc=bestguesspass action=none header.from=arm.com;

Cc: shijie.huang@xxxxxxx, Kaly.Xin@xxxxxxx, nd@xxxxxxx, wei.chen@xxxxxxx

Delivery-date: Thu, 15 Mar 2018 03:39:59 +0000

List-id: Mini-os development list <minios-devel.lists.xenproject.org>

Nodisclaimer: True

Spamdiagnosticmetadata: NSPM

Spamdiagnosticoutput: 1:99

We have use the arch/<FAMILY>/ to store the common codes and build scripts for the architectures of same family. The original codes that stored in arch/arm32 are arm32 codes. So we have move them to the new folder arch/arm/arm32. Signed-off-by: Wei Chen <Wei.Chen@xxxxxxx> --- arch/arm/Makefile.uk | 15 +- arch/arm/arm32/divsi3.S | 404 ++++++++++++++++++++++++++++++++++++++++ arch/arm/arm32/ldivmod.S | 68 +++++++ arch/arm/arm32/ldivmod_helper.c | 67 +++++++ arch/arm/arm32/qdivrem.c | 324 ++++++++++++++++++++++++++++++++ arch/arm/divsi3.S | 404 ---------------------------------------- arch/arm/ldivmod.S | 68 ------- arch/arm/ldivmod_helper.c | 67 ------- arch/arm/qdivrem.c | 324 -------------------------------- 9 files changed, 874 insertions(+), 867 deletions(-) create mode 100644 arch/arm/arm32/divsi3.S create mode 100644 arch/arm/arm32/ldivmod.S create mode 100644 arch/arm/arm32/ldivmod_helper.c create mode 100644 arch/arm/arm32/qdivrem.c delete mode 100644 arch/arm/divsi3.S delete mode 100644 arch/arm/ldivmod.S delete mode 100644 arch/arm/ldivmod_helper.c delete mode 100644 arch/arm/qdivrem.c diff --git a/arch/arm/Makefile.uk b/arch/arm/Makefile.uk index 2567dbe..780a035 100644 --- a/arch/arm/Makefile.uk +++ b/arch/arm/Makefile.uk @@ -1,3 +1,6 @@ +# Setup compiler flags and objects for arm32 +ifeq ($(UK_ARCH),arm) + ASFLAGS += -D__ARM_32__ ASFLAGS += -marm CFLAGS += -D__ARM_32__ @@ -13,7 +16,11 @@ CFLAGS-$(MARCH_CORTEXA7) += -mcpu=cortex-a7 -mtune=cortex-a7 CXXFLAGS-$(MARCH_CORTEXA7) += -mcpu=cortex-a7 -mtune=cortex-a $(eval $(call addlib,libarmmath)) -LIBARMMATH_SRCS-$(ARCH_ARM_32) += $(UK_BASE)/arch/arm/divsi3.S -LIBARMMATH_SRCS-$(ARCH_ARM_32) += $(UK_BASE)/arch/arm/ldivmod.S -LIBARMMATH_SRCS-$(ARCH_ARM_32) += $(UK_BASE)/arch/arm/ldivmod_helper.c -LIBARMMATH_SRCS-$(ARCH_ARM_32) += $(UK_BASE)/arch/arm/qdivrem.c +LIBARMMATH_SRCS-y += $(UK_BASE)/arch/arm/arm32/divsi3.S +LIBARMMATH_SRCS-y += $(UK_BASE)/arch/arm/arm32/ldivmod.S +LIBARMMATH_SRCS-y += $(UK_BASE)/arch/arm/arm32/ldivmod_helper.c +LIBARMMATH_SRCS-y += $(UK_BASE)/arch/arm/arm32/qdivrem.c + +else +$(error Target architecture ($(UK_ARCH)) is currently not supported.) +endif diff --git a/arch/arm/arm32/divsi3.S b/arch/arm/arm32/divsi3.S new file mode 100644 index 0000000..8bf5ac2 --- /dev/null +++ b/arch/arm/arm32/divsi3.S @@ -0,0 +1,404 @@ +/* $NetBSD: divsi3.S,v 1.4 2003/04/05 23:27:15 bjh21 Exp $ */ + +/*- + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#define ENTRY_NP(symbol) \ + .globl symbol; \ + symbol: + +#define END(symbol) + +/* + * stack is aligned as there's a possibility of branching to L_overflow + * which makes a C call + */ + +ENTRY_NP(__umodsi3) + stmfd sp!, {lr} + sub sp, sp, #4 /* align stack */ + bl .L_udivide + add sp, sp, #4 /* unalign stack */ + mov r0, r1 + ldmfd sp!, {pc} +END(__umodsi3) + +ENTRY_NP(__modsi3) + stmfd sp!, {lr} + sub sp, sp, #4 /* align stack */ + bl .L_divide + add sp, sp, #4 /* unalign stack */ + mov r0, r1 + ldmfd sp!, {pc} + +.L_overflow: + /* XXX should cause a fatal error */ + mvn r0, #0 + mov pc, lr + +END(__modsi3) + +#ifdef __ARM_EABI__ +ENTRY_NP(__aeabi_uidiv) +ENTRY_NP(__aeabi_uidivmod) +#endif +ENTRY_NP(__udivsi3) +.L_udivide: /* r0 = r0 / r1; r1 = r0 % r1 */ + eor r0, r1, r0 + eor r1, r0, r1 + eor r0, r1, r0 + /* r0 = r1 / r0; r1 = r1 % r0 */ + cmp r0, #1 + bcc .L_overflow + beq .L_divide_l0 + mov ip, #0 + movs r1, r1 + bpl .L_divide_l1 + orr ip, ip, #0x20000000 /* ip bit 0x20000000 = -ve r1 */ + movs r1, r1, lsr #1 + orrcs ip, ip, #0x10000000 /* ip bit 0x10000000 = bit 0 of r1 */ + b .L_divide_l1 + +.L_divide_l0: /* r0 == 1 */ + mov r0, r1 + mov r1, #0 + mov pc, lr +#ifdef __ARM_EABI__ +END(__aeabi_uidiv) +END(__aeabi_uidivmod) +#endif +END(__udivsi3) + +#ifdef __ARM_EABI__ +ENTRY_NP(__aeabi_idiv) +ENTRY_NP(__aeabi_idivmod) +#endif +ENTRY_NP(__divsi3) +.L_divide: /* r0 = r0 / r1; r1 = r0 % r1 */ + eor r0, r1, r0 + eor r1, r0, r1 + eor r0, r1, r0 + /* r0 = r1 / r0; r1 = r1 % r0 */ + cmp r0, #1 + bcc .L_overflow + beq .L_divide_l0 + ands ip, r0, #0x80000000 + rsbmi r0, r0, #0 + ands r2, r1, #0x80000000 + eor ip, ip, r2 + rsbmi r1, r1, #0 + orr ip, r2, ip, lsr #1 /* ip bit 0x40000000 = -ve division */ + /* ip bit 0x80000000 = -ve remainder */ + +.L_divide_l1: + mov r2, #1 + mov r3, #0 + + /* + * If the highest bit of the dividend is set, we have to be + * careful when shifting the divisor. Test this. + */ + movs r1,r1 + bpl .L_old_code + + /* + * At this point, the highest bit of r1 is known to be set. + * We abuse this below in the tst instructions. + */ + tst r1, r0 /*, lsl #0 */ + bmi .L_divide_b1 + tst r1, r0, lsl #1 + bmi .L_divide_b2 + tst r1, r0, lsl #2 + bmi .L_divide_b3 + tst r1, r0, lsl #3 + bmi .L_divide_b4 + tst r1, r0, lsl #4 + bmi .L_divide_b5 + tst r1, r0, lsl #5 + bmi .L_divide_b6 + tst r1, r0, lsl #6 + bmi .L_divide_b7 + tst r1, r0, lsl #7 + bmi .L_divide_b8 + tst r1, r0, lsl #8 + bmi .L_divide_b9 + tst r1, r0, lsl #9 + bmi .L_divide_b10 + tst r1, r0, lsl #10 + bmi .L_divide_b11 + tst r1, r0, lsl #11 + bmi .L_divide_b12 + tst r1, r0, lsl #12 + bmi .L_divide_b13 + tst r1, r0, lsl #13 + bmi .L_divide_b14 + tst r1, r0, lsl #14 + bmi .L_divide_b15 + tst r1, r0, lsl #15 + bmi .L_divide_b16 + tst r1, r0, lsl #16 + bmi .L_divide_b17 + tst r1, r0, lsl #17 + bmi .L_divide_b18 + tst r1, r0, lsl #18 + bmi .L_divide_b19 + tst r1, r0, lsl #19 + bmi .L_divide_b20 + tst r1, r0, lsl #20 + bmi .L_divide_b21 + tst r1, r0, lsl #21 + bmi .L_divide_b22 + tst r1, r0, lsl #22 + bmi .L_divide_b23 + tst r1, r0, lsl #23 + bmi .L_divide_b24 + tst r1, r0, lsl #24 + bmi .L_divide_b25 + tst r1, r0, lsl #25 + bmi .L_divide_b26 + tst r1, r0, lsl #26 + bmi .L_divide_b27 + tst r1, r0, lsl #27 + bmi .L_divide_b28 + tst r1, r0, lsl #28 + bmi .L_divide_b29 + tst r1, r0, lsl #29 + bmi .L_divide_b30 + tst r1, r0, lsl #30 + bmi .L_divide_b31 +/* + * instead of: + * tst r1, r0, lsl #31 + * bmi .L_divide_b32 + */ + b .L_divide_b32 + +.L_old_code: + cmp r1, r0 + bcc .L_divide_b0 + cmp r1, r0, lsl #1 + bcc .L_divide_b1 + cmp r1, r0, lsl #2 + bcc .L_divide_b2 + cmp r1, r0, lsl #3 + bcc .L_divide_b3 + cmp r1, r0, lsl #4 + bcc .L_divide_b4 + cmp r1, r0, lsl #5 + bcc .L_divide_b5 + cmp r1, r0, lsl #6 + bcc .L_divide_b6 + cmp r1, r0, lsl #7 + bcc .L_divide_b7 + cmp r1, r0, lsl #8 + bcc .L_divide_b8 + cmp r1, r0, lsl #9 + bcc .L_divide_b9 + cmp r1, r0, lsl #10 + bcc .L_divide_b10 + cmp r1, r0, lsl #11 + bcc .L_divide_b11 + cmp r1, r0, lsl #12 + bcc .L_divide_b12 + cmp r1, r0, lsl #13 + bcc .L_divide_b13 + cmp r1, r0, lsl #14 + bcc .L_divide_b14 + cmp r1, r0, lsl #15 + bcc .L_divide_b15 + cmp r1, r0, lsl #16 + bcc .L_divide_b16 + cmp r1, r0, lsl #17 + bcc .L_divide_b17 + cmp r1, r0, lsl #18 + bcc .L_divide_b18 + cmp r1, r0, lsl #19 + bcc .L_divide_b19 + cmp r1, r0, lsl #20 + bcc .L_divide_b20 + cmp r1, r0, lsl #21 + bcc .L_divide_b21 + cmp r1, r0, lsl #22 + bcc .L_divide_b22 + cmp r1, r0, lsl #23 + bcc .L_divide_b23 + cmp r1, r0, lsl #24 + bcc .L_divide_b24 + cmp r1, r0, lsl #25 + bcc .L_divide_b25 + cmp r1, r0, lsl #26 + bcc .L_divide_b26 + cmp r1, r0, lsl #27 + bcc .L_divide_b27 + cmp r1, r0, lsl #28 + bcc .L_divide_b28 + cmp r1, r0, lsl #29 + bcc .L_divide_b29 + cmp r1, r0, lsl #30 + bcc .L_divide_b30 +.L_divide_b32: + cmp r1, r0, lsl #31 + subhs r1, r1,r0, lsl #31 + addhs r3, r3,r2, lsl #31 +.L_divide_b31: + cmp r1, r0, lsl #30 + subhs r1, r1,r0, lsl #30 + addhs r3, r3,r2, lsl #30 +.L_divide_b30: + cmp r1, r0, lsl #29 + subhs r1, r1,r0, lsl #29 + addhs r3, r3,r2, lsl #29 +.L_divide_b29: + cmp r1, r0, lsl #28 + subhs r1, r1,r0, lsl #28 + addhs r3, r3,r2, lsl #28 +.L_divide_b28: + cmp r1, r0, lsl #27 + subhs r1, r1,r0, lsl #27 + addhs r3, r3,r2, lsl #27 +.L_divide_b27: + cmp r1, r0, lsl #26 + subhs r1, r1,r0, lsl #26 + addhs r3, r3,r2, lsl #26 +.L_divide_b26: + cmp r1, r0, lsl #25 + subhs r1, r1,r0, lsl #25 + addhs r3, r3,r2, lsl #25 +.L_divide_b25: + cmp r1, r0, lsl #24 + subhs r1, r1,r0, lsl #24 + addhs r3, r3,r2, lsl #24 +.L_divide_b24: + cmp r1, r0, lsl #23 + subhs r1, r1,r0, lsl #23 + addhs r3, r3,r2, lsl #23 +.L_divide_b23: + cmp r1, r0, lsl #22 + subhs r1, r1,r0, lsl #22 + addhs r3, r3,r2, lsl #22 +.L_divide_b22: + cmp r1, r0, lsl #21 + subhs r1, r1,r0, lsl #21 + addhs r3, r3,r2, lsl #21 +.L_divide_b21: + cmp r1, r0, lsl #20 + subhs r1, r1,r0, lsl #20 + addhs r3, r3,r2, lsl #20 +.L_divide_b20: + cmp r1, r0, lsl #19 + subhs r1, r1,r0, lsl #19 + addhs r3, r3,r2, lsl #19 +.L_divide_b19: + cmp r1, r0, lsl #18 + subhs r1, r1,r0, lsl #18 + addhs r3, r3,r2, lsl #18 +.L_divide_b18: + cmp r1, r0, lsl #17 + subhs r1, r1,r0, lsl #17 + addhs r3, r3,r2, lsl #17 +.L_divide_b17: + cmp r1, r0, lsl #16 + subhs r1, r1,r0, lsl #16 + addhs r3, r3,r2, lsl #16 +.L_divide_b16: + cmp r1, r0, lsl #15 + subhs r1, r1,r0, lsl #15 + addhs r3, r3,r2, lsl #15 +.L_divide_b15: + cmp r1, r0, lsl #14 + subhs r1, r1,r0, lsl #14 + addhs r3, r3,r2, lsl #14 +.L_divide_b14: + cmp r1, r0, lsl #13 + subhs r1, r1,r0, lsl #13 + addhs r3, r3,r2, lsl #13 +.L_divide_b13: + cmp r1, r0, lsl #12 + subhs r1, r1,r0, lsl #12 + addhs r3, r3,r2, lsl #12 +.L_divide_b12: + cmp r1, r0, lsl #11 + subhs r1, r1,r0, lsl #11 + addhs r3, r3,r2, lsl #11 +.L_divide_b11: + cmp r1, r0, lsl #10 + subhs r1, r1,r0, lsl #10 + addhs r3, r3,r2, lsl #10 +.L_divide_b10: + cmp r1, r0, lsl #9 + subhs r1, r1,r0, lsl #9 + addhs r3, r3,r2, lsl #9 +.L_divide_b9: + cmp r1, r0, lsl #8 + subhs r1, r1,r0, lsl #8 + addhs r3, r3,r2, lsl #8 +.L_divide_b8: + cmp r1, r0, lsl #7 + subhs r1, r1,r0, lsl #7 + addhs r3, r3,r2, lsl #7 +.L_divide_b7: + cmp r1, r0, lsl #6 + subhs r1, r1,r0, lsl #6 + addhs r3, r3,r2, lsl #6 +.L_divide_b6: + cmp r1, r0, lsl #5 + subhs r1, r1,r0, lsl #5 + addhs r3, r3,r2, lsl #5 +.L_divide_b5: + cmp r1, r0, lsl #4 + subhs r1, r1,r0, lsl #4 + addhs r3, r3,r2, lsl #4 +.L_divide_b4: + cmp r1, r0, lsl #3 + subhs r1, r1,r0, lsl #3 + addhs r3, r3,r2, lsl #3 +.L_divide_b3: + cmp r1, r0, lsl #2 + subhs r1, r1,r0, lsl #2 + addhs r3, r3,r2, lsl #2 +.L_divide_b2: + cmp r1, r0, lsl #1 + subhs r1, r1,r0, lsl #1 + addhs r3, r3,r2, lsl #1 +.L_divide_b1: + cmp r1, r0 + subhs r1, r1, r0 + addhs r3, r3, r2 +.L_divide_b0: + + tst ip, #0x20000000 + bne .L_udivide_l1 + mov r0, r3 + cmp ip, #0 + rsbmi r1, r1, #0 + movs ip, ip, lsl #1 + bicmi r0, r0, #0x80000000 /* Fix incase we divided 0x80000000 */ + rsbmi r0, r0, #0 + mov pc, lr + +.L_udivide_l1: + tst ip, #0x10000000 + mov r1, r1, lsl #1 + orrne r1, r1, #1 + mov r3, r3, lsl #1 + cmp r1, r0 + subhs r1, r1, r0 + addhs r3, r3, r2 + mov r0, r3 + mov pc, lr +END(__aeabi_idiv) +END(__aeabi_idivmod) +END(__divsi3) + diff --git a/arch/arm/arm32/ldivmod.S b/arch/arm/arm32/ldivmod.S new file mode 100644 index 0000000..180227c --- /dev/null +++ b/arch/arm/arm32/ldivmod.S @@ -0,0 +1,68 @@ +/* SPDX-License-Identifier: BSD-2-Clause */ +/* + * Copyright (C) 2012 Andrew Turner + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + */ + +#define ENTRY_NP(symbol) \ + .globl symbol; \ + symbol: + +#define END(symbol) + +/* + * These calculate: + * q = n / m + * With a remainer r. + * + * They take n in {r0, r1} and m in {r2, r3} then pass them into the + * helper function. The hepler functions return q in {r0, r1} as + * required by the API spec however r is returned on the stack. The + * ABI required us to return r in {r2, r3}. + * + * We need to allocate 8 bytes on the stack to store r, the link + * register, and a pointer to the space where the helper function + * will write r to. After returning from the helper fuinction we load + * the old link register and r from the stack and return. + */ +ENTRY_NP(__aeabi_ldivmod) + sub sp, sp, #8 /* Space for the remainder */ + stmfd sp!, {sp, lr} /* Save a pointer to the above space and lr */ + bl __kern_ldivmod + ldr lr, [sp, #4] /* Restore lr */ + add sp, sp, #8 /* Move sp to the remainder value */ + ldmfd sp!, {r2, r3} /* Load the remainder */ + mov pc, lr +END(__aeabi_ldivmod) + +ENTRY_NP(__aeabi_uldivmod) + sub sp, sp, #8 /* Space for the remainder */ + stmfd sp!, {sp, lr} /* Save a pointer to the above space and lr */ + bl __qdivrem + ldr lr, [sp, #4] /* Restore lr */ + add sp, sp, #8 /* Move sp to the remainder value */ + ldmfd sp!, {r2, r3} /* Load the remainder */ + mov pc, lr +END(__aeabi_uldivmod) diff --git a/arch/arm/arm32/ldivmod_helper.c b/arch/arm/arm32/ldivmod_helper.c new file mode 100644 index 0000000..098523e --- /dev/null +++ b/arch/arm/arm32/ldivmod_helper.c @@ -0,0 +1,67 @@ +/* SPDX-License-Identifier: BSD-2-Clause */ +/* + * Copyright (C) 2012 Andrew Turner + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + */ + +#include <uk/arch/types.h> + +__u64 __qdivrem(__u64 u, __u64 v, __u64 *rem); + +#ifndef HAVE_LIBC +__s64 __divdi3(__s64 a, __s64 b) +{ + __u64 ua, ub, uq; + int neg; + + if (a < 0) + ua = -(__u64)a, neg = 1; + else + ua = a, neg = 0; + if (b < 0) + ub = -(__u64)b, neg ^= 1; + else + ub = b; + uq = __qdivrem(ua, ub, (__u64 *)0); + return neg ? -uq : uq; +} +#endif + +/* + * Helper for __aeabi_ldivmod. + * TODO: __divdi3 calls __qdivrem. We should do the same and use the + * remainder value rather than re-calculating it. + */ +long long __kern_ldivmod(long long, long long, long long *); + +long long __kern_ldivmod(long long n, long long m, long long *rem) +{ + long long q; + + q = __divdi3(n, m); /* q = n / m */ + *rem = n - m * q; + + return q; +} diff --git a/arch/arm/arm32/qdivrem.c b/arch/arm/arm32/qdivrem.c new file mode 100644 index 0000000..e7d1471 --- /dev/null +++ b/arch/arm/arm32/qdivrem.c @@ -0,0 +1,324 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/*- + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This software was developed by the Computer Systems Engineering group + * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and + * contributed to Berkeley. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <uk/arch/types.h> +#include <uk/arch/limits.h> + +/* + * Multiprecision divide. This algorithm is from Knuth vol. 2 (2nd ed), + * section 4.3.1, pp. 257--259. + */ + +/* + * From + * @(#)quad.h 8.1 (Berkeley) 6/4/93 + */ + +#ifdef __BIG_ENDIAN +#define _QUAD_HIGHWORD 0 +#define _QUAD_LOWWORD 1 +#else /* __LITTLE_ENDIAN */ +#define _QUAD_HIGHWORD 1 +#define _QUAD_LOWWORD 0 +#endif + +/* + * Define high and low longwords. + */ +#define QUADH _QUAD_HIGHWORD +#define QUADL _QUAD_LOWWORD + +/* + * Total number of bits in a quad_t and in the pieces that make it up. + * These are used for shifting, and also below for halfword extraction + * and assembly. + */ +#define CHAR_BIT 8 /* number of bits in a char */ +#define QUAD_BITS (sizeof(__s64) * CHAR_BIT) +#define LONG_BITS (sizeof(long) * CHAR_BIT) +#define HALF_BITS (sizeof(long) * CHAR_BIT / 2) + +#define DIGIT_BASE (1 << HALF_BITS) /* digit base */ +/* + * Extract high and low shortwords from longword, and move low shortword of + * longword to upper half of long, i.e., produce the upper longword of + * ((quad_t)(x) << (number_of_bits_in_long/2)). (`x' must actually be u_long.) + * + * These are used in the multiply code, to split a longword into upper + * and lower halves, and to reassemble a product as a quad_t, shifted left + * (sizeof(long)*CHAR_BIT/2). + */ +#define HHALF(x) ((x) >> HALF_BITS) +#define LHALF(x) ((x) & ((1 << HALF_BITS) - 1)) +#define LHUP(x) ((x) << HALF_BITS) + +#define COMBINE(a, b) (((unsigned long)(a) << HALF_BITS) | (b)) + +/* + * Depending on the desired operation, we view a `long long' (aka quad_t) in + * one or more of the following formats. + */ +union uu { + __s64 q; /* as a (signed) quad */ + __s64 uq; /* as an unsigned quad */ + long sl[2]; /* as two signed longs */ + unsigned long ul[2]; /* as two unsigned longs */ +}; + +#define B (1 << HALF_BITS) /* digit base */ + +/* select a type for digits in base B: use unsigned short if they fit */ +#if __UL_MAX == 0xffffffff && __US_MAX >= 0xffff +typedef unsigned short digit; +#else +typedef unsigned long digit; +#endif + +/* + * Shift p[0]..p[len] left `sh' bits, ignoring any bits that + * `fall out' the left (there never will be any such anyway). + * We may assume len >= 0. NOTE THAT THIS WRITES len+1 DIGITS. + */ +static void __shl(register digit *p, register int len, register int sh) +{ + register int i; + + for (i = 0; i < len; i++) + p[i] = LHALF(p[i] << sh) | (p[i + 1] >> (HALF_BITS - sh)); + p[i] = LHALF(p[i] << sh); +} + +/* + * __qdivrem(u, v, rem) returns u/v and, optionally, sets *rem to u%v. + * + * We do this in base 2-sup-HALF_BITS, so that all intermediate products + * fit within u_long. As a consequence, the maximum length dividend and + * divisor are 4 `digits' in this base (they are shorter if they have + * leading zeros). + */ +__u64 __qdivrem(__u64 uq, __u64 vq, __u64 *arq) +{ + union uu tmp; + digit *u, *v, *q; + register digit v1, v2; + unsigned long qhat, rhat, t; + int m, n, d, j, i; + digit uspace[5], vspace[5], qspace[5]; + + /* + * Take care of special cases: divide by zero, and u < v. + */ + if (vq == 0) { + /* divide by zero. */ + static volatile const unsigned int zero = 0; + + tmp.ul[QUADH] = tmp.ul[QUADL] = 1 / zero; + if (arq) + *arq = uq; + return tmp.q; + } + if (uq < vq) { + if (arq) + *arq = uq; + return 0; + } + u = &uspace[0]; + v = &vspace[0]; + q = &qspace[0]; + + /* + * Break dividend and divisor into digits in base B, then + * count leading zeros to determine m and n. When done, we + * will have: + * u = (u[1]u[2]...u[m+n]) sub B + * v = (v[1]v[2]...v[n]) sub B + * v[1] != 0 + * 1 < n <= 4 (if n = 1, we use a different division algorithm) + * m >= 0 (otherwise u < v, which we already checked) + * m + n = 4 + * and thus + * m = 4 - n <= 2 + */ + tmp.uq = uq; + u[0] = 0; + u[1] = HHALF(tmp.ul[QUADH]); + u[2] = LHALF(tmp.ul[QUADH]); + u[3] = HHALF(tmp.ul[QUADL]); + u[4] = LHALF(tmp.ul[QUADL]); + tmp.uq = vq; + v[1] = HHALF(tmp.ul[QUADH]); + v[2] = LHALF(tmp.ul[QUADH]); + v[3] = HHALF(tmp.ul[QUADL]); + v[4] = LHALF(tmp.ul[QUADL]); + for (n = 4; v[1] == 0; v++) { + if (--n == 1) { + unsigned long rbj; /* r*B+u[j] (not root boy jim) */ + digit q1, q2, q3, q4; + + /* + * Change of plan, per exercise 16. + * r = 0; + * for j = 1..4: + * q[j] = floor((r*B + u[j]) / v), + * r = (r*B + u[j]) % v; + * We unroll this completely here. + */ + t = v[2]; /* nonzero, by definition */ + q1 = u[1] / t; + rbj = COMBINE(u[1] % t, u[2]); + q2 = rbj / t; + rbj = COMBINE(rbj % t, u[3]); + q3 = rbj / t; + rbj = COMBINE(rbj % t, u[4]); + q4 = rbj / t; + if (arq) + *arq = rbj % t; + tmp.ul[QUADH] = COMBINE(q1, q2); + tmp.ul[QUADL] = COMBINE(q3, q4); + return tmp.q; + } + } + + /* + * By adjusting q once we determine m, we can guarantee that + * there is a complete four-digit quotient at &qspace[1] when + * we finally stop. + */ + for (m = 4 - n; u[1] == 0; u++) + m--; + for (i = 4 - m; --i >= 0;) + q[i] = 0; + q += 4 - m; + + /* + * Here we run Program D, translated from MIX to C and acquiring + * a few minor changes. + * + * D1: choose multiplier 1 << d to ensure v[1] >= B/2. + */ + d = 0; + for (t = v[1]; t < B / 2; t <<= 1) + d++; + if (d > 0) { + __shl(&u[0], m + n, d); /* u <<= d */ + __shl(&v[1], n - 1, d); /* v <<= d */ + } + /* + * D2: j = 0. + */ + j = 0; + v1 = v[1]; /* for D3 -- note that v[1..n] are constant */ + v2 = v[2]; /* for D3 */ + do { + register digit uj0, uj1, uj2; + + /* + * D3: Calculate qhat (\^q, in TeX notation). + * Let qhat = min((u[j]*B + u[j+1])/v[1], B-1), and + * let rhat = (u[j]*B + u[j+1]) mod v[1]. + * While rhat < B and v[2]*qhat > rhat*B+u[j+2], + * decrement qhat and increase rhat correspondingly. + * Note that if rhat >= B, v[2]*qhat < rhat*B. + */ + uj0 = u[j + 0]; /* for D3 only -- note that u[j+...] change */ + uj1 = u[j + 1]; /* for D3 only */ + uj2 = u[j + 2]; /* for D3 only */ + if (uj0 == v1) { + qhat = B; + rhat = uj1; + goto qhat_too_big; + } else { + unsigned long nn = COMBINE(uj0, uj1); + + qhat = nn / v1; + rhat = nn % v1; + } + while (v2 * qhat > COMBINE(rhat, uj2)) { +qhat_too_big: + qhat--; + if ((rhat += v1) >= B) + break; + } + /* + * D4: Multiply and subtract. + * The variable `t' holds any borrows across the loop. + * We split this up so that we do not require v[0] = 0, + * and to eliminate a final special case. + */ + for (t = 0, i = n; i > 0; i--) { + t = u[i + j] - v[i] * qhat - t; + u[i + j] = LHALF(t); + t = (B - HHALF(t)) & (B - 1); + } + t = u[j] - t; + u[j] = LHALF(t); + /* + * D5: test remainder. + * There is a borrow if and only if HHALF(t) is nonzero; + * in that (rare) case, qhat was too large (by exactly 1). + * Fix it by adding v[1..n] to u[j..j+n]. + */ + if (HHALF(t)) { + qhat--; + for (t = 0, i = n; i > 0; i--) { /* D6: add back. */ + t += u[i + j] + v[i]; + u[i + j] = LHALF(t); + t = HHALF(t); + } + u[j] = LHALF(u[j] + t); + } + q[j] = qhat; + } while (++j <= m); /* D7: loop on j. */ + + /* + * If caller wants the remainder, we have to calculate it as + * u[m..m+n] >> d (this is at most n digits and thus fits in + * u[m+1..m+n], but we may need more source digits). + */ + if (arq) { + if (d) { + for (i = m + n; i > m; --i) + u[i] = (u[i] >> d) | + LHALF(u[i - 1] << (HALF_BITS - d)); + u[i] = 0; + } + tmp.ul[QUADH] = COMBINE(uspace[1], uspace[2]); + tmp.ul[QUADL] = COMBINE(uspace[3], uspace[4]); + *arq = tmp.q; + } + + tmp.ul[QUADH] = COMBINE(qspace[1], qspace[2]); + tmp.ul[QUADL] = COMBINE(qspace[3], qspace[4]); + return tmp.q; +} diff --git a/arch/arm/divsi3.S b/arch/arm/divsi3.S deleted file mode 100644 index fa92233..0000000 --- a/arch/arm/divsi3.S +++ /dev/null @@ -1,404 +0,0 @@ -/* $NetBSD: divsi3.S,v 1.4 2003/04/05 23:27:15 bjh21 Exp $ */ - -/*- - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#define ENTRY_NP(symbol) \ - .globl symbol; \ - symbol: - -#define END(symbol) - -/* - * stack is aligned as there's a possibility of branching to L_overflow - * which makes a C call - */ - -ENTRY_NP(__umodsi3) - stmfd sp!, {lr} - sub sp, sp, #4 /* align stack */ - bl .L_udivide - add sp, sp, #4 /* unalign stack */ - mov r0, r1 - ldmfd sp!, {pc} -END(__umodsi3) - -ENTRY_NP(__modsi3) - stmfd sp!, {lr} - sub sp, sp, #4 /* align stack */ - bl .L_divide - add sp, sp, #4 /* unalign stack */ - mov r0, r1 - ldmfd sp!, {pc} - -.L_overflow: - /* XXX should cause a fatal error */ - mvn r0, #0 - mov pc, lr - -END(__modsi3) - -#ifdef __ARM_EABI__ -ENTRY_NP(__aeabi_uidiv) -ENTRY_NP(__aeabi_uidivmod) -#endif -ENTRY_NP(__udivsi3) -.L_udivide: /* r0 = r0 / r1; r1 = r0 % r1 */ - eor r0, r1, r0 - eor r1, r0, r1 - eor r0, r1, r0 - /* r0 = r1 / r0; r1 = r1 % r0 */ - cmp r0, #1 - bcc .L_overflow - beq .L_divide_l0 - mov ip, #0 - movs r1, r1 - bpl .L_divide_l1 - orr ip, ip, #0x20000000 /* ip bit 0x20000000 = -ve r1 */ - movs r1, r1, lsr #1 - orrcs ip, ip, #0x10000000 /* ip bit 0x10000000 = bit 0 of r1 */ - b .L_divide_l1 - -.L_divide_l0: /* r0 == 1 */ - mov r0, r1 - mov r1, #0 - mov pc, lr -#ifdef __ARM_EABI__ -END(__aeabi_uidiv) -END(__aeabi_uidivmod) -#endif -END(__udivsi3) - -#ifdef __ARM_EABI__ -ENTRY_NP(__aeabi_idiv) -ENTRY_NP(__aeabi_idivmod) -#endif -ENTRY_NP(__divsi3) -.L_divide: /* r0 = r0 / r1; r1 = r0 % r1 */ - eor r0, r1, r0 - eor r1, r0, r1 - eor r0, r1, r0 - /* r0 = r1 / r0; r1 = r1 % r0 */ - cmp r0, #1 - bcc .L_overflow - beq .L_divide_l0 - ands ip, r0, #0x80000000 - rsbmi r0, r0, #0 - ands r2, r1, #0x80000000 - eor ip, ip, r2 - rsbmi r1, r1, #0 - orr ip, r2, ip, lsr #1 /* ip bit 0x40000000 = -ve division */ - /* ip bit 0x80000000 = -ve remainder */ - -.L_divide_l1: - mov r2, #1 - mov r3, #0 - - /* - * If the highest bit of the dividend is set, we have to be - * careful when shifting the divisor. Test this. - */ - movs r1,r1 - bpl .L_old_code - - /* - * At this point, the highest bit of r1 is known to be set. - * We abuse this below in the tst instructions. - */ - tst r1, r0 /*, lsl #0 */ - bmi .L_divide_b1 - tst r1, r0, lsl #1 - bmi .L_divide_b2 - tst r1, r0, lsl #2 - bmi .L_divide_b3 - tst r1, r0, lsl #3 - bmi .L_divide_b4 - tst r1, r0, lsl #4 - bmi .L_divide_b5 - tst r1, r0, lsl #5 - bmi .L_divide_b6 - tst r1, r0, lsl #6 - bmi .L_divide_b7 - tst r1, r0, lsl #7 - bmi .L_divide_b8 - tst r1, r0, lsl #8 - bmi .L_divide_b9 - tst r1, r0, lsl #9 - bmi .L_divide_b10 - tst r1, r0, lsl #10 - bmi .L_divide_b11 - tst r1, r0, lsl #11 - bmi .L_divide_b12 - tst r1, r0, lsl #12 - bmi .L_divide_b13 - tst r1, r0, lsl #13 - bmi .L_divide_b14 - tst r1, r0, lsl #14 - bmi .L_divide_b15 - tst r1, r0, lsl #15 - bmi .L_divide_b16 - tst r1, r0, lsl #16 - bmi .L_divide_b17 - tst r1, r0, lsl #17 - bmi .L_divide_b18 - tst r1, r0, lsl #18 - bmi .L_divide_b19 - tst r1, r0, lsl #19 - bmi .L_divide_b20 - tst r1, r0, lsl #20 - bmi .L_divide_b21 - tst r1, r0, lsl #21 - bmi .L_divide_b22 - tst r1, r0, lsl #22 - bmi .L_divide_b23 - tst r1, r0, lsl #23 - bmi .L_divide_b24 - tst r1, r0, lsl #24 - bmi .L_divide_b25 - tst r1, r0, lsl #25 - bmi .L_divide_b26 - tst r1, r0, lsl #26 - bmi .L_divide_b27 - tst r1, r0, lsl #27 - bmi .L_divide_b28 - tst r1, r0, lsl #28 - bmi .L_divide_b29 - tst r1, r0, lsl #29 - bmi .L_divide_b30 - tst r1, r0, lsl #30 - bmi .L_divide_b31 -/* - * instead of: - * tst r1, r0, lsl #31 - * bmi .L_divide_b32 - */ - b .L_divide_b32 - -.L_old_code: - cmp r1, r0 - bcc .L_divide_b0 - cmp r1, r0, lsl #1 - bcc .L_divide_b1 - cmp r1, r0, lsl #2 - bcc .L_divide_b2 - cmp r1, r0, lsl #3 - bcc .L_divide_b3 - cmp r1, r0, lsl #4 - bcc .L_divide_b4 - cmp r1, r0, lsl #5 - bcc .L_divide_b5 - cmp r1, r0, lsl #6 - bcc .L_divide_b6 - cmp r1, r0, lsl #7 - bcc .L_divide_b7 - cmp r1, r0, lsl #8 - bcc .L_divide_b8 - cmp r1, r0, lsl #9 - bcc .L_divide_b9 - cmp r1, r0, lsl #10 - bcc .L_divide_b10 - cmp r1, r0, lsl #11 - bcc .L_divide_b11 - cmp r1, r0, lsl #12 - bcc .L_divide_b12 - cmp r1, r0, lsl #13 - bcc .L_divide_b13 - cmp r1, r0, lsl #14 - bcc .L_divide_b14 - cmp r1, r0, lsl #15 - bcc .L_divide_b15 - cmp r1, r0, lsl #16 - bcc .L_divide_b16 - cmp r1, r0, lsl #17 - bcc .L_divide_b17 - cmp r1, r0, lsl #18 - bcc .L_divide_b18 - cmp r1, r0, lsl #19 - bcc .L_divide_b19 - cmp r1, r0, lsl #20 - bcc .L_divide_b20 - cmp r1, r0, lsl #21 - bcc .L_divide_b21 - cmp r1, r0, lsl #22 - bcc .L_divide_b22 - cmp r1, r0, lsl #23 - bcc .L_divide_b23 - cmp r1, r0, lsl #24 - bcc .L_divide_b24 - cmp r1, r0, lsl #25 - bcc .L_divide_b25 - cmp r1, r0, lsl #26 - bcc .L_divide_b26 - cmp r1, r0, lsl #27 - bcc .L_divide_b27 - cmp r1, r0, lsl #28 - bcc .L_divide_b28 - cmp r1, r0, lsl #29 - bcc .L_divide_b29 - cmp r1, r0, lsl #30 - bcc .L_divide_b30 -.L_divide_b32: - cmp r1, r0, lsl #31 - subhs r1, r1,r0, lsl #31 - addhs r3, r3,r2, lsl #31 -.L_divide_b31: - cmp r1, r0, lsl #30 - subhs r1, r1,r0, lsl #30 - addhs r3, r3,r2, lsl #30 -.L_divide_b30: - cmp r1, r0, lsl #29 - subhs r1, r1,r0, lsl #29 - addhs r3, r3,r2, lsl #29 -.L_divide_b29: - cmp r1, r0, lsl #28 - subhs r1, r1,r0, lsl #28 - addhs r3, r3,r2, lsl #28 -.L_divide_b28: - cmp r1, r0, lsl #27 - subhs r1, r1,r0, lsl #27 - addhs r3, r3,r2, lsl #27 -.L_divide_b27: - cmp r1, r0, lsl #26 - subhs r1, r1,r0, lsl #26 - addhs r3, r3,r2, lsl #26 -.L_divide_b26: - cmp r1, r0, lsl #25 - subhs r1, r1,r0, lsl #25 - addhs r3, r3,r2, lsl #25 -.L_divide_b25: - cmp r1, r0, lsl #24 - subhs r1, r1,r0, lsl #24 - addhs r3, r3,r2, lsl #24 -.L_divide_b24: - cmp r1, r0, lsl #23 - subhs r1, r1,r0, lsl #23 - addhs r3, r3,r2, lsl #23 -.L_divide_b23: - cmp r1, r0, lsl #22 - subhs r1, r1,r0, lsl #22 - addhs r3, r3,r2, lsl #22 -.L_divide_b22: - cmp r1, r0, lsl #21 - subhs r1, r1,r0, lsl #21 - addhs r3, r3,r2, lsl #21 -.L_divide_b21: - cmp r1, r0, lsl #20 - subhs r1, r1,r0, lsl #20 - addhs r3, r3,r2, lsl #20 -.L_divide_b20: - cmp r1, r0, lsl #19 - subhs r1, r1,r0, lsl #19 - addhs r3, r3,r2, lsl #19 -.L_divide_b19: - cmp r1, r0, lsl #18 - subhs r1, r1,r0, lsl #18 - addhs r3, r3,r2, lsl #18 -.L_divide_b18: - cmp r1, r0, lsl #17 - subhs r1, r1,r0, lsl #17 - addhs r3, r3,r2, lsl #17 -.L_divide_b17: - cmp r1, r0, lsl #16 - subhs r1, r1,r0, lsl #16 - addhs r3, r3,r2, lsl #16 -.L_divide_b16: - cmp r1, r0, lsl #15 - subhs r1, r1,r0, lsl #15 - addhs r3, r3,r2, lsl #15 -.L_divide_b15: - cmp r1, r0, lsl #14 - subhs r1, r1,r0, lsl #14 - addhs r3, r3,r2, lsl #14 -.L_divide_b14: - cmp r1, r0, lsl #13 - subhs r1, r1,r0, lsl #13 - addhs r3, r3,r2, lsl #13 -.L_divide_b13: - cmp r1, r0, lsl #12 - subhs r1, r1,r0, lsl #12 - addhs r3, r3,r2, lsl #12 -.L_divide_b12: - cmp r1, r0, lsl #11 - subhs r1, r1,r0, lsl #11 - addhs r3, r3,r2, lsl #11 -.L_divide_b11: - cmp r1, r0, lsl #10 - subhs r1, r1,r0, lsl #10 - addhs r3, r3,r2, lsl #10 -.L_divide_b10: - cmp r1, r0, lsl #9 - subhs r1, r1,r0, lsl #9 - addhs r3, r3,r2, lsl #9 -.L_divide_b9: - cmp r1, r0, lsl #8 - subhs r1, r1,r0, lsl #8 - addhs r3, r3,r2, lsl #8 -.L_divide_b8: - cmp r1, r0, lsl #7 - subhs r1, r1,r0, lsl #7 - addhs r3, r3,r2, lsl #7 -.L_divide_b7: - cmp r1, r0, lsl #6 - subhs r1, r1,r0, lsl #6 - addhs r3, r3,r2, lsl #6 -.L_divide_b6: - cmp r1, r0, lsl #5 - subhs r1, r1,r0, lsl #5 - addhs r3, r3,r2, lsl #5 -.L_divide_b5: - cmp r1, r0, lsl #4 - subhs r1, r1,r0, lsl #4 - addhs r3, r3,r2, lsl #4 -.L_divide_b4: - cmp r1, r0, lsl #3 - subhs r1, r1,r0, lsl #3 - addhs r3, r3,r2, lsl #3 -.L_divide_b3: - cmp r1, r0, lsl #2 - subhs r1, r1,r0, lsl #2 - addhs r3, r3,r2, lsl #2 -.L_divide_b2: - cmp r1, r0, lsl #1 - subhs r1, r1,r0, lsl #1 - addhs r3, r3,r2, lsl #1 -.L_divide_b1: - cmp r1, r0 - subhs r1, r1, r0 - addhs r3, r3, r2 -.L_divide_b0: - - tst ip, #0x20000000 - bne .L_udivide_l1 - mov r0, r3 - cmp ip, #0 - rsbmi r1, r1, #0 - movs ip, ip, lsl #1 - bicmi r0, r0, #0x80000000 /* Fix incase we divided 0x80000000 */ - rsbmi r0, r0, #0 - mov pc, lr - -.L_udivide_l1: - tst ip, #0x10000000 - mov r1, r1, lsl #1 - orrne r1, r1, #1 - mov r3, r3, lsl #1 - cmp r1, r0 - subhs r1, r1, r0 - addhs r3, r3, r2 - mov r0, r3 - mov pc, lr -END(__aeabi_idiv) -END(__aeabi_idivmod) -END(__divsi3) - diff --git a/arch/arm/ldivmod.S b/arch/arm/ldivmod.S deleted file mode 100644 index 3c3083b..0000000 --- a/arch/arm/ldivmod.S +++ /dev/null @@ -1,68 +0,0 @@ -/* SPDX-License-Identifier: BSD-2-Clause */ -/* - * Copyright (C) 2012 Andrew Turner - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - */ - -#define ENTRY_NP(symbol) \ - .globl symbol; \ - symbol: - -#define END(symbol) - -/* - * These calculate: - * q = n / m - * With a remainer r. - * - * They take n in {r0, r1} and m in {r2, r3} then pass them into the - * helper function. The hepler functions return q in {r0, r1} as - * required by the API spec however r is returned on the stack. The - * ABI required us to return r in {r2, r3}. - * - * We need to allocate 8 bytes on the stack to store r, the link - * register, and a pointer to the space where the helper function - * will write r to. After returning from the helper fuinction we load - * the old link register and r from the stack and return. - */ -ENTRY_NP(__aeabi_ldivmod) - sub sp, sp, #8 /* Space for the remainder */ - stmfd sp!, {sp, lr} /* Save a pointer to the above space and lr */ - bl __kern_ldivmod - ldr lr, [sp, #4] /* Restore lr */ - add sp, sp, #8 /* Move sp to the remainder value */ - ldmfd sp!, {r2, r3} /* Load the remainder */ - mov pc, lr -END(__aeabi_ldivmod) - -ENTRY_NP(__aeabi_uldivmod) - sub sp, sp, #8 /* Space for the remainder */ - stmfd sp!, {sp, lr} /* Save a pointer to the above space and lr */ - bl __qdivrem - ldr lr, [sp, #4] /* Restore lr */ - add sp, sp, #8 /* Move sp to the remainder value */ - ldmfd sp!, {r2, r3} /* Load the remainder */ - mov pc, lr -END(__aeabi_uldivmod) diff --git a/arch/arm/ldivmod_helper.c b/arch/arm/ldivmod_helper.c deleted file mode 100644 index 098523e..0000000 --- a/arch/arm/ldivmod_helper.c +++ /dev/null @@ -1,67 +0,0 @@ -/* SPDX-License-Identifier: BSD-2-Clause */ -/* - * Copyright (C) 2012 Andrew Turner - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - */ - -#include <uk/arch/types.h> - -__u64 __qdivrem(__u64 u, __u64 v, __u64 *rem); - -#ifndef HAVE_LIBC -__s64 __divdi3(__s64 a, __s64 b) -{ - __u64 ua, ub, uq; - int neg; - - if (a < 0) - ua = -(__u64)a, neg = 1; - else - ua = a, neg = 0; - if (b < 0) - ub = -(__u64)b, neg ^= 1; - else - ub = b; - uq = __qdivrem(ua, ub, (__u64 *)0); - return neg ? -uq : uq; -} -#endif - -/* - * Helper for __aeabi_ldivmod. - * TODO: __divdi3 calls __qdivrem. We should do the same and use the - * remainder value rather than re-calculating it. - */ -long long __kern_ldivmod(long long, long long, long long *); - -long long __kern_ldivmod(long long n, long long m, long long *rem) -{ - long long q; - - q = __divdi3(n, m); /* q = n / m */ - *rem = n - m * q; - - return q; -} diff --git a/arch/arm/qdivrem.c b/arch/arm/qdivrem.c deleted file mode 100644 index e7d1471..0000000 --- a/arch/arm/qdivrem.c +++ /dev/null @@ -1,324 +0,0 @@ -/* SPDX-License-Identifier: BSD-3-Clause */ -/*- - * Copyright (c) 1992, 1993 - * The Regents of the University of California. All rights reserved. - * - * This software was developed by the Computer Systems Engineering group - * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and - * contributed to Berkeley. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include <uk/arch/types.h> -#include <uk/arch/limits.h> - -/* - * Multiprecision divide. This algorithm is from Knuth vol. 2 (2nd ed), - * section 4.3.1, pp. 257--259. - */ - -/* - * From - * @(#)quad.h 8.1 (Berkeley) 6/4/93 - */ - -#ifdef __BIG_ENDIAN -#define _QUAD_HIGHWORD 0 -#define _QUAD_LOWWORD 1 -#else /* __LITTLE_ENDIAN */ -#define _QUAD_HIGHWORD 1 -#define _QUAD_LOWWORD 0 -#endif - -/* - * Define high and low longwords. - */ -#define QUADH _QUAD_HIGHWORD -#define QUADL _QUAD_LOWWORD - -/* - * Total number of bits in a quad_t and in the pieces that make it up. - * These are used for shifting, and also below for halfword extraction - * and assembly. - */ -#define CHAR_BIT 8 /* number of bits in a char */ -#define QUAD_BITS (sizeof(__s64) * CHAR_BIT) -#define LONG_BITS (sizeof(long) * CHAR_BIT) -#define HALF_BITS (sizeof(long) * CHAR_BIT / 2) - -#define DIGIT_BASE (1 << HALF_BITS) /* digit base */ -/* - * Extract high and low shortwords from longword, and move low shortword of - * longword to upper half of long, i.e., produce the upper longword of - * ((quad_t)(x) << (number_of_bits_in_long/2)). (`x' must actually be u_long.) - * - * These are used in the multiply code, to split a longword into upper - * and lower halves, and to reassemble a product as a quad_t, shifted left - * (sizeof(long)*CHAR_BIT/2). - */ -#define HHALF(x) ((x) >> HALF_BITS) -#define LHALF(x) ((x) & ((1 << HALF_BITS) - 1)) -#define LHUP(x) ((x) << HALF_BITS) - -#define COMBINE(a, b) (((unsigned long)(a) << HALF_BITS) | (b)) - -/* - * Depending on the desired operation, we view a `long long' (aka quad_t) in - * one or more of the following formats. - */ -union uu { - __s64 q; /* as a (signed) quad */ - __s64 uq; /* as an unsigned quad */ - long sl[2]; /* as two signed longs */ - unsigned long ul[2]; /* as two unsigned longs */ -}; - -#define B (1 << HALF_BITS) /* digit base */ - -/* select a type for digits in base B: use unsigned short if they fit */ -#if __UL_MAX == 0xffffffff && __US_MAX >= 0xffff -typedef unsigned short digit; -#else -typedef unsigned long digit; -#endif - -/* - * Shift p[0]..p[len] left `sh' bits, ignoring any bits that - * `fall out' the left (there never will be any such anyway). - * We may assume len >= 0. NOTE THAT THIS WRITES len+1 DIGITS. - */ -static void __shl(register digit *p, register int len, register int sh) -{ - register int i; - - for (i = 0; i < len; i++) - p[i] = LHALF(p[i] << sh) | (p[i + 1] >> (HALF_BITS - sh)); - p[i] = LHALF(p[i] << sh); -} - -/* - * __qdivrem(u, v, rem) returns u/v and, optionally, sets *rem to u%v. - * - * We do this in base 2-sup-HALF_BITS, so that all intermediate products - * fit within u_long. As a consequence, the maximum length dividend and - * divisor are 4 `digits' in this base (they are shorter if they have - * leading zeros). - */ -__u64 __qdivrem(__u64 uq, __u64 vq, __u64 *arq) -{ - union uu tmp; - digit *u, *v, *q; - register digit v1, v2; - unsigned long qhat, rhat, t; - int m, n, d, j, i; - digit uspace[5], vspace[5], qspace[5]; - - /* - * Take care of special cases: divide by zero, and u < v. - */ - if (vq == 0) { - /* divide by zero. */ - static volatile const unsigned int zero = 0; - - tmp.ul[QUADH] = tmp.ul[QUADL] = 1 / zero; - if (arq) - *arq = uq; - return tmp.q; - } - if (uq < vq) { - if (arq) - *arq = uq; - return 0; - } - u = &uspace[0]; - v = &vspace[0]; - q = &qspace[0]; - - /* - * Break dividend and divisor into digits in base B, then - * count leading zeros to determine m and n. When done, we - * will have: - * u = (u[1]u[2]...u[m+n]) sub B - * v = (v[1]v[2]...v[n]) sub B - * v[1] != 0 - * 1 < n <= 4 (if n = 1, we use a different division algorithm) - * m >= 0 (otherwise u < v, which we already checked) - * m + n = 4 - * and thus - * m = 4 - n <= 2 - */ - tmp.uq = uq; - u[0] = 0; - u[1] = HHALF(tmp.ul[QUADH]); - u[2] = LHALF(tmp.ul[QUADH]); - u[3] = HHALF(tmp.ul[QUADL]); - u[4] = LHALF(tmp.ul[QUADL]); - tmp.uq = vq; - v[1] = HHALF(tmp.ul[QUADH]); - v[2] = LHALF(tmp.ul[QUADH]); - v[3] = HHALF(tmp.ul[QUADL]); - v[4] = LHALF(tmp.ul[QUADL]); - for (n = 4; v[1] == 0; v++) { - if (--n == 1) { - unsigned long rbj; /* r*B+u[j] (not root boy jim) */ - digit q1, q2, q3, q4; - - /* - * Change of plan, per exercise 16. - * r = 0; - * for j = 1..4: - * q[j] = floor((r*B + u[j]) / v), - * r = (r*B + u[j]) % v; - * We unroll this completely here. - */ - t = v[2]; /* nonzero, by definition */ - q1 = u[1] / t; - rbj = COMBINE(u[1] % t, u[2]); - q2 = rbj / t; - rbj = COMBINE(rbj % t, u[3]); - q3 = rbj / t; - rbj = COMBINE(rbj % t, u[4]); - q4 = rbj / t; - if (arq) - *arq = rbj % t; - tmp.ul[QUADH] = COMBINE(q1, q2); - tmp.ul[QUADL] = COMBINE(q3, q4); - return tmp.q; - } - } - - /* - * By adjusting q once we determine m, we can guarantee that - * there is a complete four-digit quotient at &qspace[1] when - * we finally stop. - */ - for (m = 4 - n; u[1] == 0; u++) - m--; - for (i = 4 - m; --i >= 0;) - q[i] = 0; - q += 4 - m; - - /* - * Here we run Program D, translated from MIX to C and acquiring - * a few minor changes. - * - * D1: choose multiplier 1 << d to ensure v[1] >= B/2. - */ - d = 0; - for (t = v[1]; t < B / 2; t <<= 1) - d++; - if (d > 0) { - __shl(&u[0], m + n, d); /* u <<= d */ - __shl(&v[1], n - 1, d); /* v <<= d */ - } - /* - * D2: j = 0. - */ - j = 0; - v1 = v[1]; /* for D3 -- note that v[1..n] are constant */ - v2 = v[2]; /* for D3 */ - do { - register digit uj0, uj1, uj2; - - /* - * D3: Calculate qhat (\^q, in TeX notation). - * Let qhat = min((u[j]*B + u[j+1])/v[1], B-1), and - * let rhat = (u[j]*B + u[j+1]) mod v[1]. - * While rhat < B and v[2]*qhat > rhat*B+u[j+2], - * decrement qhat and increase rhat correspondingly. - * Note that if rhat >= B, v[2]*qhat < rhat*B. - */ - uj0 = u[j + 0]; /* for D3 only -- note that u[j+...] change */ - uj1 = u[j + 1]; /* for D3 only */ - uj2 = u[j + 2]; /* for D3 only */ - if (uj0 == v1) { - qhat = B; - rhat = uj1; - goto qhat_too_big; - } else { - unsigned long nn = COMBINE(uj0, uj1); - - qhat = nn / v1; - rhat = nn % v1; - } - while (v2 * qhat > COMBINE(rhat, uj2)) { -qhat_too_big: - qhat--; - if ((rhat += v1) >= B) - break; - } - /* - * D4: Multiply and subtract. - * The variable `t' holds any borrows across the loop. - * We split this up so that we do not require v[0] = 0, - * and to eliminate a final special case. - */ - for (t = 0, i = n; i > 0; i--) { - t = u[i + j] - v[i] * qhat - t; - u[i + j] = LHALF(t); - t = (B - HHALF(t)) & (B - 1); - } - t = u[j] - t; - u[j] = LHALF(t); - /* - * D5: test remainder. - * There is a borrow if and only if HHALF(t) is nonzero; - * in that (rare) case, qhat was too large (by exactly 1). - * Fix it by adding v[1..n] to u[j..j+n]. - */ - if (HHALF(t)) { - qhat--; - for (t = 0, i = n; i > 0; i--) { /* D6: add back. */ - t += u[i + j] + v[i]; - u[i + j] = LHALF(t); - t = HHALF(t); - } - u[j] = LHALF(u[j] + t); - } - q[j] = qhat; - } while (++j <= m); /* D7: loop on j. */ - - /* - * If caller wants the remainder, we have to calculate it as - * u[m..m+n] >> d (this is at most n digits and thus fits in - * u[m+1..m+n], but we may need more source digits). - */ - if (arq) { - if (d) { - for (i = m + n; i > m; --i) - u[i] = (u[i] >> d) | - LHALF(u[i - 1] << (HALF_BITS - d)); - u[i] = 0; - } - tmp.ul[QUADH] = COMBINE(uspace[1], uspace[2]); - tmp.ul[QUADL] = COMBINE(uspace[3], uspace[4]); - *arq = tmp.q; - } - - tmp.ul[QUADH] = COMBINE(qspace[1], qspace[2]); - tmp.ul[QUADL] = COMBINE(qspace[3], qspace[4]); - return tmp.q; -} -- 2.7.4 _______________________________________________ Minios-devel mailing list Minios-devel@xxxxxxxxxxxxxxxxxxxx https://lists.xenproject.org/mailman/listinfo/minios-devel

©2013 Xen Project, A Linux Foundation Collaborative Project. All Rights Reserved.
Linux Foundation is a registered trademark of The Linux Foundation.
Xen Project is a trademark of The Linux Foundation.