|
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [Minios-devel] [UNIKRAFT PATCH 3/9] build: Move arm32 libraries to new family/architecture folder
Hi Simon,
> -----Original Message-----
> From: Simon Kuenzer <simon.kuenzer@xxxxxxxxx>
> Sent: 2018年4月5日 5:28
> To: Wei Chen <Wei.Chen@xxxxxxx>; minios-devel@xxxxxxxxxxxxxxxxxxxx
> Cc: Shijie Huang <Shijie.Huang@xxxxxxx>; Kaly Xin <Kaly.Xin@xxxxxxx>; nd
> <nd@xxxxxxx>
> Subject: Re: [UNIKRAFT PATCH 3/9] build: Move arm32 libraries to new
> family/architecture folder
>
> On 15.03.2018 04:39, Wei Chen wrote:
> > We have use the arch/<FAMILY>/ to store the common codes and build
> > scripts for the architectures of same family. The original codes
> > that stored in arch/arm32 are arm32 codes. So we have move them to
> > the new folder arch/arm/arm32.
> >
> > Signed-off-by: Wei Chen <Wei.Chen@xxxxxxx>
> > ---
> > arch/arm/Makefile.uk | 15 +-
> > arch/arm/arm32/divsi3.S | 404
> ++++++++++++++++++++++++++++++++++++++++
> > arch/arm/arm32/ldivmod.S | 68 +++++++
> > arch/arm/arm32/ldivmod_helper.c | 67 +++++++
> > arch/arm/arm32/qdivrem.c | 324 ++++++++++++++++++++++++++++++++
> > arch/arm/divsi3.S | 404 ------------------------------------
> ----
> > arch/arm/ldivmod.S | 68 -------
> > arch/arm/ldivmod_helper.c | 67 -------
> > arch/arm/qdivrem.c | 324 --------------------------------
> > 9 files changed, 874 insertions(+), 867 deletions(-)
> > create mode 100644 arch/arm/arm32/divsi3.S
> > create mode 100644 arch/arm/arm32/ldivmod.S
> > create mode 100644 arch/arm/arm32/ldivmod_helper.c
> > create mode 100644 arch/arm/arm32/qdivrem.c
> > delete mode 100644 arch/arm/divsi3.S
> > delete mode 100644 arch/arm/ldivmod.S
> > delete mode 100644 arch/arm/ldivmod_helper.c
> > delete mode 100644 arch/arm/qdivrem.c
> >
> > diff --git a/arch/arm/Makefile.uk b/arch/arm/Makefile.uk
> > index 2567dbe..780a035 100644
> > --- a/arch/arm/Makefile.uk
> > +++ b/arch/arm/Makefile.uk
> > @@ -1,3 +1,6 @@
> > +# Setup compiler flags and objects for arm32
>
> I would call it libraries in the comment instead of objects. libarmmath
> is essentially a library for ARM32. Will you need something similar for
> AARCH64?
Current libarmmath provides the integer arithmetic routines for those
Platforms that don't provide hardware float support. The hardware float
support of AArch64 is mandatory. For half/single/double precious
float-point, AArch64 Floating-point instructions can handle them directly.
For 128-bit float (long double), we should implement some routines like
__divtf3 in libarmmath.
But I think, currently, AArch64 doesn't need this libarmmath.
> If yes, keep the LIBARMMATH_SRCS-$(ARCH_ARM_32) form and do the
> addlib call for both CPU architectures. If a library does not have any
> source files defined, the build system will automatically exclude it
> from the build.
>
> > +ifeq ($(UK_ARCH),arm)
> > +
> > ASFLAGS += -D__ARM_32__
> > ASFLAGS += -marm
> > CFLAGS += -D__ARM_32__
> > @@ -13,7 +16,11 @@ CFLAGS-$(MARCH_CORTEXA7) += -mcpu=cortex-a7 -
> mtune=cortex-a7
> > CXXFLAGS-$(MARCH_CORTEXA7) += -mcpu=cortex-a7 -mtune=cortex-a
> >
> > $(eval $(call addlib,libarmmath))
> > -LIBARMMATH_SRCS-$(ARCH_ARM_32) += $(UK_BASE)/arch/arm/divsi3.S
> > -LIBARMMATH_SRCS-$(ARCH_ARM_32) += $(UK_BASE)/arch/arm/ldivmod.S
> > -LIBARMMATH_SRCS-$(ARCH_ARM_32) += $(UK_BASE)/arch/arm/ldivmod_helper.c
> > -LIBARMMATH_SRCS-$(ARCH_ARM_32) += $(UK_BASE)/arch/arm/qdivrem.c
> > +LIBARMMATH_SRCS-y += $(UK_BASE)/arch/arm/arm32/divsi3.S
> > +LIBARMMATH_SRCS-y += $(UK_BASE)/arch/arm/arm32/ldivmod.S
> > +LIBARMMATH_SRCS-y += $(UK_BASE)/arch/arm/arm32/ldivmod_helper.c
> > +LIBARMMATH_SRCS-y += $(UK_BASE)/arch/arm/arm32/qdivrem.c
> > +
> > +else
> > +$(error Target architecture ($(UK_ARCH)) is currently not supported.)
> > +endif
> > diff --git a/arch/arm/arm32/divsi3.S b/arch/arm/arm32/divsi3.S
> > new file mode 100644
> > index 0000000..8bf5ac2
> > --- /dev/null
> > +++ b/arch/arm/arm32/divsi3.S
> > @@ -0,0 +1,404 @@
> > +/* $NetBSD: divsi3.S,v 1.4 2003/04/05 23:27:15 bjh21 Exp $ */
> > +
> > +/*-
> > + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
> > + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
> > + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
> PURPOSE
> > + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
> > + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
> CONSEQUENTIAL
> > + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
> > + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
> > + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
> STRICT
> > + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
> WAY
> > + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
> > + * SUCH DAMAGE.
> > + */
> > +
> > +#define ENTRY_NP(symbol) \
> > + .globl symbol; \
> > + symbol:
> > +
> > +#define END(symbol)
> > +
> > +/*
> > + * stack is aligned as there's a possibility of branching to L_overflow
> > + * which makes a C call
> > + */
> > +
> > +ENTRY_NP(__umodsi3)
> > + stmfd sp!, {lr}
> > + sub sp, sp, #4 /* align stack */
> > + bl .L_udivide
> > + add sp, sp, #4 /* unalign stack */
> > + mov r0, r1
> > + ldmfd sp!, {pc}
> > +END(__umodsi3)
> > +
> > +ENTRY_NP(__modsi3)
> > + stmfd sp!, {lr}
> > + sub sp, sp, #4 /* align stack */
> > + bl .L_divide
> > + add sp, sp, #4 /* unalign stack */
> > + mov r0, r1
> > + ldmfd sp!, {pc}
> > +
> > +.L_overflow:
> > + /* XXX should cause a fatal error */
> > + mvn r0, #0
> > + mov pc, lr
> > +
> > +END(__modsi3)
> > +
> > +#ifdef __ARM_EABI__
> > +ENTRY_NP(__aeabi_uidiv)
> > +ENTRY_NP(__aeabi_uidivmod)
> > +#endif
> > +ENTRY_NP(__udivsi3)
> > +.L_udivide: /* r0 = r0 / r1; r1 = r0 % r1 */
> > + eor r0, r1, r0
> > + eor r1, r0, r1
> > + eor r0, r1, r0
> > + /* r0 = r1 / r0; r1 = r1 % r0 */
> > + cmp r0, #1
> > + bcc .L_overflow
> > + beq .L_divide_l0
> > + mov ip, #0
> > + movs r1, r1
> > + bpl .L_divide_l1
> > + orr ip, ip, #0x20000000 /* ip bit 0x20000000 = -ve r1 */
> > + movs r1, r1, lsr #1
> > + orrcs ip, ip, #0x10000000 /* ip bit 0x10000000 = bit 0 of r1 */
> > + b .L_divide_l1
> > +
> > +.L_divide_l0: /* r0 == 1 */
> > + mov r0, r1
> > + mov r1, #0
> > + mov pc, lr
> > +#ifdef __ARM_EABI__
> > +END(__aeabi_uidiv)
> > +END(__aeabi_uidivmod)
> > +#endif
> > +END(__udivsi3)
> > +
> > +#ifdef __ARM_EABI__
> > +ENTRY_NP(__aeabi_idiv)
> > +ENTRY_NP(__aeabi_idivmod)
> > +#endif
> > +ENTRY_NP(__divsi3)
> > +.L_divide: /* r0 = r0 / r1; r1 = r0 % r1 */
> > + eor r0, r1, r0
> > + eor r1, r0, r1
> > + eor r0, r1, r0
> > + /* r0 = r1 / r0; r1 = r1 % r0 */
> > + cmp r0, #1
> > + bcc .L_overflow
> > + beq .L_divide_l0
> > + ands ip, r0, #0x80000000
> > + rsbmi r0, r0, #0
> > + ands r2, r1, #0x80000000
> > + eor ip, ip, r2
> > + rsbmi r1, r1, #0
> > + orr ip, r2, ip, lsr #1 /* ip bit 0x40000000 = -ve division */
> > + /* ip bit 0x80000000 = -ve remainder */
> > +
> > +.L_divide_l1:
> > + mov r2, #1
> > + mov r3, #0
> > +
> > + /*
> > + * If the highest bit of the dividend is set, we have to be
> > + * careful when shifting the divisor. Test this.
> > + */
> > + movs r1,r1
> > + bpl .L_old_code
> > +
> > + /*
> > + * At this point, the highest bit of r1 is known to be set.
> > + * We abuse this below in the tst instructions.
> > + */
> > + tst r1, r0 /*, lsl #0 */
> > + bmi .L_divide_b1
> > + tst r1, r0, lsl #1
> > + bmi .L_divide_b2
> > + tst r1, r0, lsl #2
> > + bmi .L_divide_b3
> > + tst r1, r0, lsl #3
> > + bmi .L_divide_b4
> > + tst r1, r0, lsl #4
> > + bmi .L_divide_b5
> > + tst r1, r0, lsl #5
> > + bmi .L_divide_b6
> > + tst r1, r0, lsl #6
> > + bmi .L_divide_b7
> > + tst r1, r0, lsl #7
> > + bmi .L_divide_b8
> > + tst r1, r0, lsl #8
> > + bmi .L_divide_b9
> > + tst r1, r0, lsl #9
> > + bmi .L_divide_b10
> > + tst r1, r0, lsl #10
> > + bmi .L_divide_b11
> > + tst r1, r0, lsl #11
> > + bmi .L_divide_b12
> > + tst r1, r0, lsl #12
> > + bmi .L_divide_b13
> > + tst r1, r0, lsl #13
> > + bmi .L_divide_b14
> > + tst r1, r0, lsl #14
> > + bmi .L_divide_b15
> > + tst r1, r0, lsl #15
> > + bmi .L_divide_b16
> > + tst r1, r0, lsl #16
> > + bmi .L_divide_b17
> > + tst r1, r0, lsl #17
> > + bmi .L_divide_b18
> > + tst r1, r0, lsl #18
> > + bmi .L_divide_b19
> > + tst r1, r0, lsl #19
> > + bmi .L_divide_b20
> > + tst r1, r0, lsl #20
> > + bmi .L_divide_b21
> > + tst r1, r0, lsl #21
> > + bmi .L_divide_b22
> > + tst r1, r0, lsl #22
> > + bmi .L_divide_b23
> > + tst r1, r0, lsl #23
> > + bmi .L_divide_b24
> > + tst r1, r0, lsl #24
> > + bmi .L_divide_b25
> > + tst r1, r0, lsl #25
> > + bmi .L_divide_b26
> > + tst r1, r0, lsl #26
> > + bmi .L_divide_b27
> > + tst r1, r0, lsl #27
> > + bmi .L_divide_b28
> > + tst r1, r0, lsl #28
> > + bmi .L_divide_b29
> > + tst r1, r0, lsl #29
> > + bmi .L_divide_b30
> > + tst r1, r0, lsl #30
> > + bmi .L_divide_b31
> > +/*
> > + * instead of:
> > + * tst r1, r0, lsl #31
> > + * bmi .L_divide_b32
> > + */
> > + b .L_divide_b32
> > +
> > +.L_old_code:
> > + cmp r1, r0
> > + bcc .L_divide_b0
> > + cmp r1, r0, lsl #1
> > + bcc .L_divide_b1
> > + cmp r1, r0, lsl #2
> > + bcc .L_divide_b2
> > + cmp r1, r0, lsl #3
> > + bcc .L_divide_b3
> > + cmp r1, r0, lsl #4
> > + bcc .L_divide_b4
> > + cmp r1, r0, lsl #5
> > + bcc .L_divide_b5
> > + cmp r1, r0, lsl #6
> > + bcc .L_divide_b6
> > + cmp r1, r0, lsl #7
> > + bcc .L_divide_b7
> > + cmp r1, r0, lsl #8
> > + bcc .L_divide_b8
> > + cmp r1, r0, lsl #9
> > + bcc .L_divide_b9
> > + cmp r1, r0, lsl #10
> > + bcc .L_divide_b10
> > + cmp r1, r0, lsl #11
> > + bcc .L_divide_b11
> > + cmp r1, r0, lsl #12
> > + bcc .L_divide_b12
> > + cmp r1, r0, lsl #13
> > + bcc .L_divide_b13
> > + cmp r1, r0, lsl #14
> > + bcc .L_divide_b14
> > + cmp r1, r0, lsl #15
> > + bcc .L_divide_b15
> > + cmp r1, r0, lsl #16
> > + bcc .L_divide_b16
> > + cmp r1, r0, lsl #17
> > + bcc .L_divide_b17
> > + cmp r1, r0, lsl #18
> > + bcc .L_divide_b18
> > + cmp r1, r0, lsl #19
> > + bcc .L_divide_b19
> > + cmp r1, r0, lsl #20
> > + bcc .L_divide_b20
> > + cmp r1, r0, lsl #21
> > + bcc .L_divide_b21
> > + cmp r1, r0, lsl #22
> > + bcc .L_divide_b22
> > + cmp r1, r0, lsl #23
> > + bcc .L_divide_b23
> > + cmp r1, r0, lsl #24
> > + bcc .L_divide_b24
> > + cmp r1, r0, lsl #25
> > + bcc .L_divide_b25
> > + cmp r1, r0, lsl #26
> > + bcc .L_divide_b26
> > + cmp r1, r0, lsl #27
> > + bcc .L_divide_b27
> > + cmp r1, r0, lsl #28
> > + bcc .L_divide_b28
> > + cmp r1, r0, lsl #29
> > + bcc .L_divide_b29
> > + cmp r1, r0, lsl #30
> > + bcc .L_divide_b30
> > +.L_divide_b32:
> > + cmp r1, r0, lsl #31
> > + subhs r1, r1,r0, lsl #31
> > + addhs r3, r3,r2, lsl #31
> > +.L_divide_b31:
> > + cmp r1, r0, lsl #30
> > + subhs r1, r1,r0, lsl #30
> > + addhs r3, r3,r2, lsl #30
> > +.L_divide_b30:
> > + cmp r1, r0, lsl #29
> > + subhs r1, r1,r0, lsl #29
> > + addhs r3, r3,r2, lsl #29
> > +.L_divide_b29:
> > + cmp r1, r0, lsl #28
> > + subhs r1, r1,r0, lsl #28
> > + addhs r3, r3,r2, lsl #28
> > +.L_divide_b28:
> > + cmp r1, r0, lsl #27
> > + subhs r1, r1,r0, lsl #27
> > + addhs r3, r3,r2, lsl #27
> > +.L_divide_b27:
> > + cmp r1, r0, lsl #26
> > + subhs r1, r1,r0, lsl #26
> > + addhs r3, r3,r2, lsl #26
> > +.L_divide_b26:
> > + cmp r1, r0, lsl #25
> > + subhs r1, r1,r0, lsl #25
> > + addhs r3, r3,r2, lsl #25
> > +.L_divide_b25:
> > + cmp r1, r0, lsl #24
> > + subhs r1, r1,r0, lsl #24
> > + addhs r3, r3,r2, lsl #24
> > +.L_divide_b24:
> > + cmp r1, r0, lsl #23
> > + subhs r1, r1,r0, lsl #23
> > + addhs r3, r3,r2, lsl #23
> > +.L_divide_b23:
> > + cmp r1, r0, lsl #22
> > + subhs r1, r1,r0, lsl #22
> > + addhs r3, r3,r2, lsl #22
> > +.L_divide_b22:
> > + cmp r1, r0, lsl #21
> > + subhs r1, r1,r0, lsl #21
> > + addhs r3, r3,r2, lsl #21
> > +.L_divide_b21:
> > + cmp r1, r0, lsl #20
> > + subhs r1, r1,r0, lsl #20
> > + addhs r3, r3,r2, lsl #20
> > +.L_divide_b20:
> > + cmp r1, r0, lsl #19
> > + subhs r1, r1,r0, lsl #19
> > + addhs r3, r3,r2, lsl #19
> > +.L_divide_b19:
> > + cmp r1, r0, lsl #18
> > + subhs r1, r1,r0, lsl #18
> > + addhs r3, r3,r2, lsl #18
> > +.L_divide_b18:
> > + cmp r1, r0, lsl #17
> > + subhs r1, r1,r0, lsl #17
> > + addhs r3, r3,r2, lsl #17
> > +.L_divide_b17:
> > + cmp r1, r0, lsl #16
> > + subhs r1, r1,r0, lsl #16
> > + addhs r3, r3,r2, lsl #16
> > +.L_divide_b16:
> > + cmp r1, r0, lsl #15
> > + subhs r1, r1,r0, lsl #15
> > + addhs r3, r3,r2, lsl #15
> > +.L_divide_b15:
> > + cmp r1, r0, lsl #14
> > + subhs r1, r1,r0, lsl #14
> > + addhs r3, r3,r2, lsl #14
> > +.L_divide_b14:
> > + cmp r1, r0, lsl #13
> > + subhs r1, r1,r0, lsl #13
> > + addhs r3, r3,r2, lsl #13
> > +.L_divide_b13:
> > + cmp r1, r0, lsl #12
> > + subhs r1, r1,r0, lsl #12
> > + addhs r3, r3,r2, lsl #12
> > +.L_divide_b12:
> > + cmp r1, r0, lsl #11
> > + subhs r1, r1,r0, lsl #11
> > + addhs r3, r3,r2, lsl #11
> > +.L_divide_b11:
> > + cmp r1, r0, lsl #10
> > + subhs r1, r1,r0, lsl #10
> > + addhs r3, r3,r2, lsl #10
> > +.L_divide_b10:
> > + cmp r1, r0, lsl #9
> > + subhs r1, r1,r0, lsl #9
> > + addhs r3, r3,r2, lsl #9
> > +.L_divide_b9:
> > + cmp r1, r0, lsl #8
> > + subhs r1, r1,r0, lsl #8
> > + addhs r3, r3,r2, lsl #8
> > +.L_divide_b8:
> > + cmp r1, r0, lsl #7
> > + subhs r1, r1,r0, lsl #7
> > + addhs r3, r3,r2, lsl #7
> > +.L_divide_b7:
> > + cmp r1, r0, lsl #6
> > + subhs r1, r1,r0, lsl #6
> > + addhs r3, r3,r2, lsl #6
> > +.L_divide_b6:
> > + cmp r1, r0, lsl #5
> > + subhs r1, r1,r0, lsl #5
> > + addhs r3, r3,r2, lsl #5
> > +.L_divide_b5:
> > + cmp r1, r0, lsl #4
> > + subhs r1, r1,r0, lsl #4
> > + addhs r3, r3,r2, lsl #4
> > +.L_divide_b4:
> > + cmp r1, r0, lsl #3
> > + subhs r1, r1,r0, lsl #3
> > + addhs r3, r3,r2, lsl #3
> > +.L_divide_b3:
> > + cmp r1, r0, lsl #2
> > + subhs r1, r1,r0, lsl #2
> > + addhs r3, r3,r2, lsl #2
> > +.L_divide_b2:
> > + cmp r1, r0, lsl #1
> > + subhs r1, r1,r0, lsl #1
> > + addhs r3, r3,r2, lsl #1
> > +.L_divide_b1:
> > + cmp r1, r0
> > + subhs r1, r1, r0
> > + addhs r3, r3, r2
> > +.L_divide_b0:
> > +
> > + tst ip, #0x20000000
> > + bne .L_udivide_l1
> > + mov r0, r3
> > + cmp ip, #0
> > + rsbmi r1, r1, #0
> > + movs ip, ip, lsl #1
> > + bicmi r0, r0, #0x80000000 /* Fix incase we divided 0x80000000 */
> > + rsbmi r0, r0, #0
> > + mov pc, lr
> > +
> > +.L_udivide_l1:
> > + tst ip, #0x10000000
> > + mov r1, r1, lsl #1
> > + orrne r1, r1, #1
> > + mov r3, r3, lsl #1
> > + cmp r1, r0
> > + subhs r1, r1, r0
> > + addhs r3, r3, r2
> > + mov r0, r3
> > + mov pc, lr
> > +END(__aeabi_idiv)
> > +END(__aeabi_idivmod)
> > +END(__divsi3)
> > +
> > diff --git a/arch/arm/arm32/ldivmod.S b/arch/arm/arm32/ldivmod.S
> > new file mode 100644
> > index 0000000..180227c
> > --- /dev/null
> > +++ b/arch/arm/arm32/ldivmod.S
> > @@ -0,0 +1,68 @@
> > +/* SPDX-License-Identifier: BSD-2-Clause */
> > +/*
> > + * Copyright (C) 2012 Andrew Turner
> > + * All rights reserved.
> > + *
> > + * Redistribution and use in source and binary forms, with or without
> > + * modification, are permitted provided that the following conditions
> > + * are met:
> > + * 1. Redistributions of source code must retain the above copyright
> > + * notice, this list of conditions and the following disclaimer.
> > + * 2. Redistributions in binary form must reproduce the above copyright
> > + * notice, this list of conditions and the following disclaimer in the
> > + * documentation and/or other materials provided with the distribution.
> > + *
> > + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
> > + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
> > + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
> PURPOSE
> > + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
> > + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
> CONSEQUENTIAL
> > + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
> > + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
> > + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
> STRICT
> > + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
> WAY
> > + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
> > + * SUCH DAMAGE.
> > + *
> > + */
> > +
> > +#define ENTRY_NP(symbol) \
> > + .globl symbol; \
> > + symbol:
> > +
> > +#define END(symbol)
> > +
> > +/*
> > + * These calculate:
> > + * q = n / m
> > + * With a remainer r.
> > + *
> > + * They take n in {r0, r1} and m in {r2, r3} then pass them into the
> > + * helper function. The hepler functions return q in {r0, r1} as
> > + * required by the API spec however r is returned on the stack. The
> > + * ABI required us to return r in {r2, r3}.
> > + *
> > + * We need to allocate 8 bytes on the stack to store r, the link
> > + * register, and a pointer to the space where the helper function
> > + * will write r to. After returning from the helper fuinction we load
> > + * the old link register and r from the stack and return.
> > + */
> > +ENTRY_NP(__aeabi_ldivmod)
> > + sub sp, sp, #8 /* Space for the remainder */
> > + stmfd sp!, {sp, lr} /* Save a pointer to the above space and lr */
> > + bl __kern_ldivmod
> > + ldr lr, [sp, #4] /* Restore lr */
> > + add sp, sp, #8 /* Move sp to the remainder value */
> > + ldmfd sp!, {r2, r3} /* Load the remainder */
> > + mov pc, lr
> > +END(__aeabi_ldivmod)
> > +
> > +ENTRY_NP(__aeabi_uldivmod)
> > + sub sp, sp, #8 /* Space for the remainder */
> > + stmfd sp!, {sp, lr} /* Save a pointer to the above space and lr */
> > + bl __qdivrem
> > + ldr lr, [sp, #4] /* Restore lr */
> > + add sp, sp, #8 /* Move sp to the remainder value */
> > + ldmfd sp!, {r2, r3} /* Load the remainder */
> > + mov pc, lr
> > +END(__aeabi_uldivmod)
> > diff --git a/arch/arm/arm32/ldivmod_helper.c
> b/arch/arm/arm32/ldivmod_helper.c
> > new file mode 100644
> > index 0000000..098523e
> > --- /dev/null
> > +++ b/arch/arm/arm32/ldivmod_helper.c
> > @@ -0,0 +1,67 @@
> > +/* SPDX-License-Identifier: BSD-2-Clause */
> > +/*
> > + * Copyright (C) 2012 Andrew Turner
> > + * All rights reserved.
> > + *
> > + * Redistribution and use in source and binary forms, with or without
> > + * modification, are permitted provided that the following conditions
> > + * are met:
> > + * 1. Redistributions of source code must retain the above copyright
> > + * notice, this list of conditions and the following disclaimer.
> > + * 2. Redistributions in binary form must reproduce the above copyright
> > + * notice, this list of conditions and the following disclaimer in the
> > + * documentation and/or other materials provided with the distribution.
> > + *
> > + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
> > + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
> > + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
> PURPOSE
> > + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
> > + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
> CONSEQUENTIAL
> > + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
> > + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
> > + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
> STRICT
> > + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
> WAY
> > + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
> > + * SUCH DAMAGE.
> > + *
> > + */
> > +
> > +#include <uk/arch/types.h>
> > +
> > +__u64 __qdivrem(__u64 u, __u64 v, __u64 *rem);
> > +
> > +#ifndef HAVE_LIBC
> > +__s64 __divdi3(__s64 a, __s64 b)
> > +{
> > + __u64 ua, ub, uq;
> > + int neg;
> > +
> > + if (a < 0)
> > + ua = -(__u64)a, neg = 1;
> > + else
> > + ua = a, neg = 0;
> > + if (b < 0)
> > + ub = -(__u64)b, neg ^= 1;
> > + else
> > + ub = b;
> > + uq = __qdivrem(ua, ub, (__u64 *)0);
> > + return neg ? -uq : uq;
> > +}
> > +#endif
> > +
> > +/*
> > + * Helper for __aeabi_ldivmod.
> > + * TODO: __divdi3 calls __qdivrem. We should do the same and use the
> > + * remainder value rather than re-calculating it.
> > + */
> > +long long __kern_ldivmod(long long, long long, long long *);
> > +
> > +long long __kern_ldivmod(long long n, long long m, long long *rem)
> > +{
> > + long long q;
> > +
> > + q = __divdi3(n, m); /* q = n / m */
> > + *rem = n - m * q;
> > +
> > + return q;
> > +}
> > diff --git a/arch/arm/arm32/qdivrem.c b/arch/arm/arm32/qdivrem.c
> > new file mode 100644
> > index 0000000..e7d1471
> > --- /dev/null
> > +++ b/arch/arm/arm32/qdivrem.c
> > @@ -0,0 +1,324 @@
> > +/* SPDX-License-Identifier: BSD-3-Clause */
> > +/*-
> > + * Copyright (c) 1992, 1993
> > + * The Regents of the University of California. All rights reserved.
> > + *
> > + * This software was developed by the Computer Systems Engineering group
> > + * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and
> > + * contributed to Berkeley.
> > + *
> > + * Redistribution and use in source and binary forms, with or without
> > + * modification, are permitted provided that the following conditions
> > + * are met:
> > + * 1. Redistributions of source code must retain the above copyright
> > + * notice, this list of conditions and the following disclaimer.
> > + * 2. Redistributions in binary form must reproduce the above copyright
> > + * notice, this list of conditions and the following disclaimer in the
> > + * documentation and/or other materials provided with the distribution.
> > + * 4. Neither the name of the University nor the names of its contributors
> > + * may be used to endorse or promote products derived from this software
> > + * without specific prior written permission.
> > + *
> > + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
> > + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
> > + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
> PURPOSE
> > + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
> > + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
> CONSEQUENTIAL
> > + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
> > + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
> > + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
> STRICT
> > + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
> WAY
> > + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
> > + * SUCH DAMAGE.
> > + */
> > +
> > +#include <uk/arch/types.h>
> > +#include <uk/arch/limits.h>
> > +
> > +/*
> > + * Multiprecision divide. This algorithm is from Knuth vol. 2 (2nd ed),
> > + * section 4.3.1, pp. 257--259.
> > + */
> > +
> > +/*
> > + * From
> > + * @(#)quad.h 8.1 (Berkeley) 6/4/93
> > + */
> > +
> > +#ifdef __BIG_ENDIAN
> > +#define _QUAD_HIGHWORD 0
> > +#define _QUAD_LOWWORD 1
> > +#else /* __LITTLE_ENDIAN */
> > +#define _QUAD_HIGHWORD 1
> > +#define _QUAD_LOWWORD 0
> > +#endif
> > +
> > +/*
> > + * Define high and low longwords.
> > + */
> > +#define QUADH _QUAD_HIGHWORD
> > +#define QUADL _QUAD_LOWWORD
> > +
> > +/*
> > + * Total number of bits in a quad_t and in the pieces that make it up.
> > + * These are used for shifting, and also below for halfword extraction
> > + * and assembly.
> > + */
> > +#define CHAR_BIT 8 /* number of bits in a char */
> > +#define QUAD_BITS (sizeof(__s64) * CHAR_BIT)
> > +#define LONG_BITS (sizeof(long) * CHAR_BIT)
> > +#define HALF_BITS (sizeof(long) * CHAR_BIT / 2)
> > +
> > +#define DIGIT_BASE (1 << HALF_BITS) /* digit base */
> > +/*
> > + * Extract high and low shortwords from longword, and move low shortword of
> > + * longword to upper half of long, i.e., produce the upper longword of
> > + * ((quad_t)(x) << (number_of_bits_in_long/2)). (`x' must actually be
> u_long.)
> > + *
> > + * These are used in the multiply code, to split a longword into upper
> > + * and lower halves, and to reassemble a product as a quad_t, shifted left
> > + * (sizeof(long)*CHAR_BIT/2).
> > + */
> > +#define HHALF(x) ((x) >> HALF_BITS)
> > +#define LHALF(x) ((x) & ((1 << HALF_BITS) - 1))
> > +#define LHUP(x) ((x) << HALF_BITS)
> > +
> > +#define COMBINE(a, b) (((unsigned long)(a) << HALF_BITS) | (b))
> > +
> > +/*
> > + * Depending on the desired operation, we view a `long long' (aka quad_t)
> in
> > + * one or more of the following formats.
> > + */
> > +union uu {
> > + __s64 q; /* as a (signed) quad */
> > + __s64 uq; /* as an unsigned quad */
> > + long sl[2]; /* as two signed longs */
> > + unsigned long ul[2]; /* as two unsigned longs */
> > +};
> > +
> > +#define B (1 << HALF_BITS) /* digit base */
> > +
> > +/* select a type for digits in base B: use unsigned short if they fit */
> > +#if __UL_MAX == 0xffffffff && __US_MAX >= 0xffff
> > +typedef unsigned short digit;
> > +#else
> > +typedef unsigned long digit;
> > +#endif
> > +
> > +/*
> > + * Shift p[0]..p[len] left `sh' bits, ignoring any bits that
> > + * `fall out' the left (there never will be any such anyway).
> > + * We may assume len >= 0. NOTE THAT THIS WRITES len+1 DIGITS.
> > + */
> > +static void __shl(register digit *p, register int len, register int sh)
> > +{
> > + register int i;
> > +
> > + for (i = 0; i < len; i++)
> > + p[i] = LHALF(p[i] << sh) | (p[i + 1] >> (HALF_BITS - sh));
> > + p[i] = LHALF(p[i] << sh);
> > +}
> > +
> > +/*
> > + * __qdivrem(u, v, rem) returns u/v and, optionally, sets *rem to u%v.
> > + *
> > + * We do this in base 2-sup-HALF_BITS, so that all intermediate products
> > + * fit within u_long. As a consequence, the maximum length dividend and
> > + * divisor are 4 `digits' in this base (they are shorter if they have
> > + * leading zeros).
> > + */
> > +__u64 __qdivrem(__u64 uq, __u64 vq, __u64 *arq)
> > +{
> > + union uu tmp;
> > + digit *u, *v, *q;
> > + register digit v1, v2;
> > + unsigned long qhat, rhat, t;
> > + int m, n, d, j, i;
> > + digit uspace[5], vspace[5], qspace[5];
> > +
> > + /*
> > + * Take care of special cases: divide by zero, and u < v.
> > + */
> > + if (vq == 0) {
> > + /* divide by zero. */
> > + static volatile const unsigned int zero = 0;
> > +
> > + tmp.ul[QUADH] = tmp.ul[QUADL] = 1 / zero;
> > + if (arq)
> > + *arq = uq;
> > + return tmp.q;
> > + }
> > + if (uq < vq) {
> > + if (arq)
> > + *arq = uq;
> > + return 0;
> > + }
> > + u = &uspace[0];
> > + v = &vspace[0];
> > + q = &qspace[0];
> > +
> > + /*
> > + * Break dividend and divisor into digits in base B, then
> > + * count leading zeros to determine m and n. When done, we
> > + * will have:
> > + * u = (u[1]u[2]...u[m+n]) sub B
> > + * v = (v[1]v[2]...v[n]) sub B
> > + * v[1] != 0
> > + * 1 < n <= 4 (if n = 1, we use a different division algorithm)
> > + * m >= 0 (otherwise u < v, which we already checked)
> > + * m + n = 4
> > + * and thus
> > + * m = 4 - n <= 2
> > + */
> > + tmp.uq = uq;
> > + u[0] = 0;
> > + u[1] = HHALF(tmp.ul[QUADH]);
> > + u[2] = LHALF(tmp.ul[QUADH]);
> > + u[3] = HHALF(tmp.ul[QUADL]);
> > + u[4] = LHALF(tmp.ul[QUADL]);
> > + tmp.uq = vq;
> > + v[1] = HHALF(tmp.ul[QUADH]);
> > + v[2] = LHALF(tmp.ul[QUADH]);
> > + v[3] = HHALF(tmp.ul[QUADL]);
> > + v[4] = LHALF(tmp.ul[QUADL]);
> > + for (n = 4; v[1] == 0; v++) {
> > + if (--n == 1) {
> > + unsigned long rbj; /* r*B+u[j] (not root boy jim) */
> > + digit q1, q2, q3, q4;
> > +
> > + /*
> > + * Change of plan, per exercise 16.
> > + * r = 0;
> > + * for j = 1..4:
> > + * q[j] = floor((r*B + u[j]) / v),
> > + * r = (r*B + u[j]) % v;
> > + * We unroll this completely here.
> > + */
> > + t = v[2]; /* nonzero, by definition */
> > + q1 = u[1] / t;
> > + rbj = COMBINE(u[1] % t, u[2]);
> > + q2 = rbj / t;
> > + rbj = COMBINE(rbj % t, u[3]);
> > + q3 = rbj / t;
> > + rbj = COMBINE(rbj % t, u[4]);
> > + q4 = rbj / t;
> > + if (arq)
> > + *arq = rbj % t;
> > + tmp.ul[QUADH] = COMBINE(q1, q2);
> > + tmp.ul[QUADL] = COMBINE(q3, q4);
> > + return tmp.q;
> > + }
> > + }
> > +
> > + /*
> > + * By adjusting q once we determine m, we can guarantee that
> > + * there is a complete four-digit quotient at &qspace[1] when
> > + * we finally stop.
> > + */
> > + for (m = 4 - n; u[1] == 0; u++)
> > + m--;
> > + for (i = 4 - m; --i >= 0;)
> > + q[i] = 0;
> > + q += 4 - m;
> > +
> > + /*
> > + * Here we run Program D, translated from MIX to C and acquiring
> > + * a few minor changes.
> > + *
> > + * D1: choose multiplier 1 << d to ensure v[1] >= B/2.
> > + */
> > + d = 0;
> > + for (t = v[1]; t < B / 2; t <<= 1)
> > + d++;
> > + if (d > 0) {
> > + __shl(&u[0], m + n, d); /* u <<= d */
> > + __shl(&v[1], n - 1, d); /* v <<= d */
> > + }
> > + /*
> > + * D2: j = 0.
> > + */
> > + j = 0;
> > + v1 = v[1]; /* for D3 -- note that v[1..n] are constant */
> > + v2 = v[2]; /* for D3 */
> > + do {
> > + register digit uj0, uj1, uj2;
> > +
> > + /*
> > + * D3: Calculate qhat (\^q, in TeX notation).
> > + * Let qhat = min((u[j]*B + u[j+1])/v[1], B-1), and
> > + * let rhat = (u[j]*B + u[j+1]) mod v[1].
> > + * While rhat < B and v[2]*qhat > rhat*B+u[j+2],
> > + * decrement qhat and increase rhat correspondingly.
> > + * Note that if rhat >= B, v[2]*qhat < rhat*B.
> > + */
> > + uj0 = u[j + 0]; /* for D3 only -- note that u[j+...] change */
> > + uj1 = u[j + 1]; /* for D3 only */
> > + uj2 = u[j + 2]; /* for D3 only */
> > + if (uj0 == v1) {
> > + qhat = B;
> > + rhat = uj1;
> > + goto qhat_too_big;
> > + } else {
> > + unsigned long nn = COMBINE(uj0, uj1);
> > +
> > + qhat = nn / v1;
> > + rhat = nn % v1;
> > + }
> > + while (v2 * qhat > COMBINE(rhat, uj2)) {
> > +qhat_too_big:
> > + qhat--;
> > + if ((rhat += v1) >= B)
> > + break;
> > + }
> > + /*
> > + * D4: Multiply and subtract.
> > + * The variable `t' holds any borrows across the loop.
> > + * We split this up so that we do not require v[0] = 0,
> > + * and to eliminate a final special case.
> > + */
> > + for (t = 0, i = n; i > 0; i--) {
> > + t = u[i + j] - v[i] * qhat - t;
> > + u[i + j] = LHALF(t);
> > + t = (B - HHALF(t)) & (B - 1);
> > + }
> > + t = u[j] - t;
> > + u[j] = LHALF(t);
> > + /*
> > + * D5: test remainder.
> > + * There is a borrow if and only if HHALF(t) is nonzero;
> > + * in that (rare) case, qhat was too large (by exactly 1).
> > + * Fix it by adding v[1..n] to u[j..j+n].
> > + */
> > + if (HHALF(t)) {
> > + qhat--;
> > + for (t = 0, i = n; i > 0; i--) { /* D6: add back. */
> > + t += u[i + j] + v[i];
> > + u[i + j] = LHALF(t);
> > + t = HHALF(t);
> > + }
> > + u[j] = LHALF(u[j] + t);
> > + }
> > + q[j] = qhat;
> > + } while (++j <= m); /* D7: loop on j. */
> > +
> > + /*
> > + * If caller wants the remainder, we have to calculate it as
> > + * u[m..m+n] >> d (this is at most n digits and thus fits in
> > + * u[m+1..m+n], but we may need more source digits).
> > + */
> > + if (arq) {
> > + if (d) {
> > + for (i = m + n; i > m; --i)
> > + u[i] = (u[i] >> d) |
> > + LHALF(u[i - 1] << (HALF_BITS - d));
> > + u[i] = 0;
> > + }
> > + tmp.ul[QUADH] = COMBINE(uspace[1], uspace[2]);
> > + tmp.ul[QUADL] = COMBINE(uspace[3], uspace[4]);
> > + *arq = tmp.q;
> > + }
> > +
> > + tmp.ul[QUADH] = COMBINE(qspace[1], qspace[2]);
> > + tmp.ul[QUADL] = COMBINE(qspace[3], qspace[4]);
> > + return tmp.q;
> > +}
> > diff --git a/arch/arm/divsi3.S b/arch/arm/divsi3.S
> > deleted file mode 100644
> > index fa92233..0000000
> > --- a/arch/arm/divsi3.S
> > +++ /dev/null
> > @@ -1,404 +0,0 @@
> > -/* $NetBSD: divsi3.S,v 1.4 2003/04/05 23:27:15 bjh21 Exp $ */
> > -
> > -/*-
> > - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
> > - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
> > - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
> PURPOSE
> > - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
> > - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
> CONSEQUENTIAL
> > - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
> > - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
> > - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
> STRICT
> > - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
> WAY
> > - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
> > - * SUCH DAMAGE.
> > - */
> > -
> > -#define ENTRY_NP(symbol) \
> > - .globl symbol; \
> > - symbol:
> > -
> > -#define END(symbol)
> > -
> > -/*
> > - * stack is aligned as there's a possibility of branching to L_overflow
> > - * which makes a C call
> > - */
> > -
> > -ENTRY_NP(__umodsi3)
> > - stmfd sp!, {lr}
> > - sub sp, sp, #4 /* align stack */
> > - bl .L_udivide
> > - add sp, sp, #4 /* unalign stack */
> > - mov r0, r1
> > - ldmfd sp!, {pc}
> > -END(__umodsi3)
> > -
> > -ENTRY_NP(__modsi3)
> > - stmfd sp!, {lr}
> > - sub sp, sp, #4 /* align stack */
> > - bl .L_divide
> > - add sp, sp, #4 /* unalign stack */
> > - mov r0, r1
> > - ldmfd sp!, {pc}
> > -
> > -.L_overflow:
> > - /* XXX should cause a fatal error */
> > - mvn r0, #0
> > - mov pc, lr
> > -
> > -END(__modsi3)
> > -
> > -#ifdef __ARM_EABI__
> > -ENTRY_NP(__aeabi_uidiv)
> > -ENTRY_NP(__aeabi_uidivmod)
> > -#endif
> > -ENTRY_NP(__udivsi3)
> > -.L_udivide: /* r0 = r0 / r1; r1 = r0 % r1 */
> > - eor r0, r1, r0
> > - eor r1, r0, r1
> > - eor r0, r1, r0
> > - /* r0 = r1 / r0; r1 = r1 % r0 */
> > - cmp r0, #1
> > - bcc .L_overflow
> > - beq .L_divide_l0
> > - mov ip, #0
> > - movs r1, r1
> > - bpl .L_divide_l1
> > - orr ip, ip, #0x20000000 /* ip bit 0x20000000 = -ve r1 */
> > - movs r1, r1, lsr #1
> > - orrcs ip, ip, #0x10000000 /* ip bit 0x10000000 = bit 0 of r1 */
> > - b .L_divide_l1
> > -
> > -.L_divide_l0: /* r0 == 1 */
> > - mov r0, r1
> > - mov r1, #0
> > - mov pc, lr
> > -#ifdef __ARM_EABI__
> > -END(__aeabi_uidiv)
> > -END(__aeabi_uidivmod)
> > -#endif
> > -END(__udivsi3)
> > -
> > -#ifdef __ARM_EABI__
> > -ENTRY_NP(__aeabi_idiv)
> > -ENTRY_NP(__aeabi_idivmod)
> > -#endif
> > -ENTRY_NP(__divsi3)
> > -.L_divide: /* r0 = r0 / r1; r1 = r0 % r1 */
> > - eor r0, r1, r0
> > - eor r1, r0, r1
> > - eor r0, r1, r0
> > - /* r0 = r1 / r0; r1 = r1 % r0 */
> > - cmp r0, #1
> > - bcc .L_overflow
> > - beq .L_divide_l0
> > - ands ip, r0, #0x80000000
> > - rsbmi r0, r0, #0
> > - ands r2, r1, #0x80000000
> > - eor ip, ip, r2
> > - rsbmi r1, r1, #0
> > - orr ip, r2, ip, lsr #1 /* ip bit 0x40000000 = -ve division */
> > - /* ip bit 0x80000000 = -ve remainder */
> > -
> > -.L_divide_l1:
> > - mov r2, #1
> > - mov r3, #0
> > -
> > - /*
> > - * If the highest bit of the dividend is set, we have to be
> > - * careful when shifting the divisor. Test this.
> > - */
> > - movs r1,r1
> > - bpl .L_old_code
> > -
> > - /*
> > - * At this point, the highest bit of r1 is known to be set.
> > - * We abuse this below in the tst instructions.
> > - */
> > - tst r1, r0 /*, lsl #0 */
> > - bmi .L_divide_b1
> > - tst r1, r0, lsl #1
> > - bmi .L_divide_b2
> > - tst r1, r0, lsl #2
> > - bmi .L_divide_b3
> > - tst r1, r0, lsl #3
> > - bmi .L_divide_b4
> > - tst r1, r0, lsl #4
> > - bmi .L_divide_b5
> > - tst r1, r0, lsl #5
> > - bmi .L_divide_b6
> > - tst r1, r0, lsl #6
> > - bmi .L_divide_b7
> > - tst r1, r0, lsl #7
> > - bmi .L_divide_b8
> > - tst r1, r0, lsl #8
> > - bmi .L_divide_b9
> > - tst r1, r0, lsl #9
> > - bmi .L_divide_b10
> > - tst r1, r0, lsl #10
> > - bmi .L_divide_b11
> > - tst r1, r0, lsl #11
> > - bmi .L_divide_b12
> > - tst r1, r0, lsl #12
> > - bmi .L_divide_b13
> > - tst r1, r0, lsl #13
> > - bmi .L_divide_b14
> > - tst r1, r0, lsl #14
> > - bmi .L_divide_b15
> > - tst r1, r0, lsl #15
> > - bmi .L_divide_b16
> > - tst r1, r0, lsl #16
> > - bmi .L_divide_b17
> > - tst r1, r0, lsl #17
> > - bmi .L_divide_b18
> > - tst r1, r0, lsl #18
> > - bmi .L_divide_b19
> > - tst r1, r0, lsl #19
> > - bmi .L_divide_b20
> > - tst r1, r0, lsl #20
> > - bmi .L_divide_b21
> > - tst r1, r0, lsl #21
> > - bmi .L_divide_b22
> > - tst r1, r0, lsl #22
> > - bmi .L_divide_b23
> > - tst r1, r0, lsl #23
> > - bmi .L_divide_b24
> > - tst r1, r0, lsl #24
> > - bmi .L_divide_b25
> > - tst r1, r0, lsl #25
> > - bmi .L_divide_b26
> > - tst r1, r0, lsl #26
> > - bmi .L_divide_b27
> > - tst r1, r0, lsl #27
> > - bmi .L_divide_b28
> > - tst r1, r0, lsl #28
> > - bmi .L_divide_b29
> > - tst r1, r0, lsl #29
> > - bmi .L_divide_b30
> > - tst r1, r0, lsl #30
> > - bmi .L_divide_b31
> > -/*
> > - * instead of:
> > - * tst r1, r0, lsl #31
> > - * bmi .L_divide_b32
> > - */
> > - b .L_divide_b32
> > -
> > -.L_old_code:
> > - cmp r1, r0
> > - bcc .L_divide_b0
> > - cmp r1, r0, lsl #1
> > - bcc .L_divide_b1
> > - cmp r1, r0, lsl #2
> > - bcc .L_divide_b2
> > - cmp r1, r0, lsl #3
> > - bcc .L_divide_b3
> > - cmp r1, r0, lsl #4
> > - bcc .L_divide_b4
> > - cmp r1, r0, lsl #5
> > - bcc .L_divide_b5
> > - cmp r1, r0, lsl #6
> > - bcc .L_divide_b6
> > - cmp r1, r0, lsl #7
> > - bcc .L_divide_b7
> > - cmp r1, r0, lsl #8
> > - bcc .L_divide_b8
> > - cmp r1, r0, lsl #9
> > - bcc .L_divide_b9
> > - cmp r1, r0, lsl #10
> > - bcc .L_divide_b10
> > - cmp r1, r0, lsl #11
> > - bcc .L_divide_b11
> > - cmp r1, r0, lsl #12
> > - bcc .L_divide_b12
> > - cmp r1, r0, lsl #13
> > - bcc .L_divide_b13
> > - cmp r1, r0, lsl #14
> > - bcc .L_divide_b14
> > - cmp r1, r0, lsl #15
> > - bcc .L_divide_b15
> > - cmp r1, r0, lsl #16
> > - bcc .L_divide_b16
> > - cmp r1, r0, lsl #17
> > - bcc .L_divide_b17
> > - cmp r1, r0, lsl #18
> > - bcc .L_divide_b18
> > - cmp r1, r0, lsl #19
> > - bcc .L_divide_b19
> > - cmp r1, r0, lsl #20
> > - bcc .L_divide_b20
> > - cmp r1, r0, lsl #21
> > - bcc .L_divide_b21
> > - cmp r1, r0, lsl #22
> > - bcc .L_divide_b22
> > - cmp r1, r0, lsl #23
> > - bcc .L_divide_b23
> > - cmp r1, r0, lsl #24
> > - bcc .L_divide_b24
> > - cmp r1, r0, lsl #25
> > - bcc .L_divide_b25
> > - cmp r1, r0, lsl #26
> > - bcc .L_divide_b26
> > - cmp r1, r0, lsl #27
> > - bcc .L_divide_b27
> > - cmp r1, r0, lsl #28
> > - bcc .L_divide_b28
> > - cmp r1, r0, lsl #29
> > - bcc .L_divide_b29
> > - cmp r1, r0, lsl #30
> > - bcc .L_divide_b30
> > -.L_divide_b32:
> > - cmp r1, r0, lsl #31
> > - subhs r1, r1,r0, lsl #31
> > - addhs r3, r3,r2, lsl #31
> > -.L_divide_b31:
> > - cmp r1, r0, lsl #30
> > - subhs r1, r1,r0, lsl #30
> > - addhs r3, r3,r2, lsl #30
> > -.L_divide_b30:
> > - cmp r1, r0, lsl #29
> > - subhs r1, r1,r0, lsl #29
> > - addhs r3, r3,r2, lsl #29
> > -.L_divide_b29:
> > - cmp r1, r0, lsl #28
> > - subhs r1, r1,r0, lsl #28
> > - addhs r3, r3,r2, lsl #28
> > -.L_divide_b28:
> > - cmp r1, r0, lsl #27
> > - subhs r1, r1,r0, lsl #27
> > - addhs r3, r3,r2, lsl #27
> > -.L_divide_b27:
> > - cmp r1, r0, lsl #26
> > - subhs r1, r1,r0, lsl #26
> > - addhs r3, r3,r2, lsl #26
> > -.L_divide_b26:
> > - cmp r1, r0, lsl #25
> > - subhs r1, r1,r0, lsl #25
> > - addhs r3, r3,r2, lsl #25
> > -.L_divide_b25:
> > - cmp r1, r0, lsl #24
> > - subhs r1, r1,r0, lsl #24
> > - addhs r3, r3,r2, lsl #24
> > -.L_divide_b24:
> > - cmp r1, r0, lsl #23
> > - subhs r1, r1,r0, lsl #23
> > - addhs r3, r3,r2, lsl #23
> > -.L_divide_b23:
> > - cmp r1, r0, lsl #22
> > - subhs r1, r1,r0, lsl #22
> > - addhs r3, r3,r2, lsl #22
> > -.L_divide_b22:
> > - cmp r1, r0, lsl #21
> > - subhs r1, r1,r0, lsl #21
> > - addhs r3, r3,r2, lsl #21
> > -.L_divide_b21:
> > - cmp r1, r0, lsl #20
> > - subhs r1, r1,r0, lsl #20
> > - addhs r3, r3,r2, lsl #20
> > -.L_divide_b20:
> > - cmp r1, r0, lsl #19
> > - subhs r1, r1,r0, lsl #19
> > - addhs r3, r3,r2, lsl #19
> > -.L_divide_b19:
> > - cmp r1, r0, lsl #18
> > - subhs r1, r1,r0, lsl #18
> > - addhs r3, r3,r2, lsl #18
> > -.L_divide_b18:
> > - cmp r1, r0, lsl #17
> > - subhs r1, r1,r0, lsl #17
> > - addhs r3, r3,r2, lsl #17
> > -.L_divide_b17:
> > - cmp r1, r0, lsl #16
> > - subhs r1, r1,r0, lsl #16
> > - addhs r3, r3,r2, lsl #16
> > -.L_divide_b16:
> > - cmp r1, r0, lsl #15
> > - subhs r1, r1,r0, lsl #15
> > - addhs r3, r3,r2, lsl #15
> > -.L_divide_b15:
> > - cmp r1, r0, lsl #14
> > - subhs r1, r1,r0, lsl #14
> > - addhs r3, r3,r2, lsl #14
> > -.L_divide_b14:
> > - cmp r1, r0, lsl #13
> > - subhs r1, r1,r0, lsl #13
> > - addhs r3, r3,r2, lsl #13
> > -.L_divide_b13:
> > - cmp r1, r0, lsl #12
> > - subhs r1, r1,r0, lsl #12
> > - addhs r3, r3,r2, lsl #12
> > -.L_divide_b12:
> > - cmp r1, r0, lsl #11
> > - subhs r1, r1,r0, lsl #11
> > - addhs r3, r3,r2, lsl #11
> > -.L_divide_b11:
> > - cmp r1, r0, lsl #10
> > - subhs r1, r1,r0, lsl #10
> > - addhs r3, r3,r2, lsl #10
> > -.L_divide_b10:
> > - cmp r1, r0, lsl #9
> > - subhs r1, r1,r0, lsl #9
> > - addhs r3, r3,r2, lsl #9
> > -.L_divide_b9:
> > - cmp r1, r0, lsl #8
> > - subhs r1, r1,r0, lsl #8
> > - addhs r3, r3,r2, lsl #8
> > -.L_divide_b8:
> > - cmp r1, r0, lsl #7
> > - subhs r1, r1,r0, lsl #7
> > - addhs r3, r3,r2, lsl #7
> > -.L_divide_b7:
> > - cmp r1, r0, lsl #6
> > - subhs r1, r1,r0, lsl #6
> > - addhs r3, r3,r2, lsl #6
> > -.L_divide_b6:
> > - cmp r1, r0, lsl #5
> > - subhs r1, r1,r0, lsl #5
> > - addhs r3, r3,r2, lsl #5
> > -.L_divide_b5:
> > - cmp r1, r0, lsl #4
> > - subhs r1, r1,r0, lsl #4
> > - addhs r3, r3,r2, lsl #4
> > -.L_divide_b4:
> > - cmp r1, r0, lsl #3
> > - subhs r1, r1,r0, lsl #3
> > - addhs r3, r3,r2, lsl #3
> > -.L_divide_b3:
> > - cmp r1, r0, lsl #2
> > - subhs r1, r1,r0, lsl #2
> > - addhs r3, r3,r2, lsl #2
> > -.L_divide_b2:
> > - cmp r1, r0, lsl #1
> > - subhs r1, r1,r0, lsl #1
> > - addhs r3, r3,r2, lsl #1
> > -.L_divide_b1:
> > - cmp r1, r0
> > - subhs r1, r1, r0
> > - addhs r3, r3, r2
> > -.L_divide_b0:
> > -
> > - tst ip, #0x20000000
> > - bne .L_udivide_l1
> > - mov r0, r3
> > - cmp ip, #0
> > - rsbmi r1, r1, #0
> > - movs ip, ip, lsl #1
> > - bicmi r0, r0, #0x80000000 /* Fix incase we divided 0x80000000 */
> > - rsbmi r0, r0, #0
> > - mov pc, lr
> > -
> > -.L_udivide_l1:
> > - tst ip, #0x10000000
> > - mov r1, r1, lsl #1
> > - orrne r1, r1, #1
> > - mov r3, r3, lsl #1
> > - cmp r1, r0
> > - subhs r1, r1, r0
> > - addhs r3, r3, r2
> > - mov r0, r3
> > - mov pc, lr
> > -END(__aeabi_idiv)
> > -END(__aeabi_idivmod)
> > -END(__divsi3)
> > -
> > diff --git a/arch/arm/ldivmod.S b/arch/arm/ldivmod.S
> > deleted file mode 100644
> > index 3c3083b..0000000
> > --- a/arch/arm/ldivmod.S
> > +++ /dev/null
> > @@ -1,68 +0,0 @@
> > -/* SPDX-License-Identifier: BSD-2-Clause */
> > -/*
> > - * Copyright (C) 2012 Andrew Turner
> > - * All rights reserved.
> > - *
> > - * Redistribution and use in source and binary forms, with or without
> > - * modification, are permitted provided that the following conditions
> > - * are met:
> > - * 1. Redistributions of source code must retain the above copyright
> > - * notice, this list of conditions and the following disclaimer.
> > - * 2. Redistributions in binary form must reproduce the above copyright
> > - * notice, this list of conditions and the following disclaimer in the
> > - * documentation and/or other materials provided with the distribution.
> > - *
> > - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
> > - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
> > - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
> PURPOSE
> > - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
> > - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
> CONSEQUENTIAL
> > - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
> > - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
> > - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
> STRICT
> > - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
> WAY
> > - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
> > - * SUCH DAMAGE.
> > - *
> > - */
> > -
> > -#define ENTRY_NP(symbol) \
> > - .globl symbol; \
> > - symbol:
> > -
> > -#define END(symbol)
> > -
> > -/*
> > - * These calculate:
> > - * q = n / m
> > - * With a remainer r.
> > - *
> > - * They take n in {r0, r1} and m in {r2, r3} then pass them into the
> > - * helper function. The hepler functions return q in {r0, r1} as
> > - * required by the API spec however r is returned on the stack. The
> > - * ABI required us to return r in {r2, r3}.
> > - *
> > - * We need to allocate 8 bytes on the stack to store r, the link
> > - * register, and a pointer to the space where the helper function
> > - * will write r to. After returning from the helper fuinction we load
> > - * the old link register and r from the stack and return.
> > - */
> > -ENTRY_NP(__aeabi_ldivmod)
> > - sub sp, sp, #8 /* Space for the remainder */
> > - stmfd sp!, {sp, lr} /* Save a pointer to the above space and lr */
> > - bl __kern_ldivmod
> > - ldr lr, [sp, #4] /* Restore lr */
> > - add sp, sp, #8 /* Move sp to the remainder value */
> > - ldmfd sp!, {r2, r3} /* Load the remainder */
> > - mov pc, lr
> > -END(__aeabi_ldivmod)
> > -
> > -ENTRY_NP(__aeabi_uldivmod)
> > - sub sp, sp, #8 /* Space for the remainder */
> > - stmfd sp!, {sp, lr} /* Save a pointer to the above space and lr */
> > - bl __qdivrem
> > - ldr lr, [sp, #4] /* Restore lr */
> > - add sp, sp, #8 /* Move sp to the remainder value */
> > - ldmfd sp!, {r2, r3} /* Load the remainder */
> > - mov pc, lr
> > -END(__aeabi_uldivmod)
> > diff --git a/arch/arm/ldivmod_helper.c b/arch/arm/ldivmod_helper.c
> > deleted file mode 100644
> > index 098523e..0000000
> > --- a/arch/arm/ldivmod_helper.c
> > +++ /dev/null
> > @@ -1,67 +0,0 @@
> > -/* SPDX-License-Identifier: BSD-2-Clause */
> > -/*
> > - * Copyright (C) 2012 Andrew Turner
> > - * All rights reserved.
> > - *
> > - * Redistribution and use in source and binary forms, with or without
> > - * modification, are permitted provided that the following conditions
> > - * are met:
> > - * 1. Redistributions of source code must retain the above copyright
> > - * notice, this list of conditions and the following disclaimer.
> > - * 2. Redistributions in binary form must reproduce the above copyright
> > - * notice, this list of conditions and the following disclaimer in the
> > - * documentation and/or other materials provided with the distribution.
> > - *
> > - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
> > - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
> > - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
> PURPOSE
> > - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
> > - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
> CONSEQUENTIAL
> > - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
> > - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
> > - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
> STRICT
> > - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
> WAY
> > - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
> > - * SUCH DAMAGE.
> > - *
> > - */
> > -
> > -#include <uk/arch/types.h>
> > -
> > -__u64 __qdivrem(__u64 u, __u64 v, __u64 *rem);
> > -
> > -#ifndef HAVE_LIBC
> > -__s64 __divdi3(__s64 a, __s64 b)
> > -{
> > - __u64 ua, ub, uq;
> > - int neg;
> > -
> > - if (a < 0)
> > - ua = -(__u64)a, neg = 1;
> > - else
> > - ua = a, neg = 0;
> > - if (b < 0)
> > - ub = -(__u64)b, neg ^= 1;
> > - else
> > - ub = b;
> > - uq = __qdivrem(ua, ub, (__u64 *)0);
> > - return neg ? -uq : uq;
> > -}
> > -#endif
> > -
> > -/*
> > - * Helper for __aeabi_ldivmod.
> > - * TODO: __divdi3 calls __qdivrem. We should do the same and use the
> > - * remainder value rather than re-calculating it.
> > - */
> > -long long __kern_ldivmod(long long, long long, long long *);
> > -
> > -long long __kern_ldivmod(long long n, long long m, long long *rem)
> > -{
> > - long long q;
> > -
> > - q = __divdi3(n, m); /* q = n / m */
> > - *rem = n - m * q;
> > -
> > - return q;
> > -}
> > diff --git a/arch/arm/qdivrem.c b/arch/arm/qdivrem.c
> > deleted file mode 100644
> > index e7d1471..0000000
> > --- a/arch/arm/qdivrem.c
> > +++ /dev/null
> > @@ -1,324 +0,0 @@
> > -/* SPDX-License-Identifier: BSD-3-Clause */
> > -/*-
> > - * Copyright (c) 1992, 1993
> > - * The Regents of the University of California. All rights reserved.
> > - *
> > - * This software was developed by the Computer Systems Engineering group
> > - * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and
> > - * contributed to Berkeley.
> > - *
> > - * Redistribution and use in source and binary forms, with or without
> > - * modification, are permitted provided that the following conditions
> > - * are met:
> > - * 1. Redistributions of source code must retain the above copyright
> > - * notice, this list of conditions and the following disclaimer.
> > - * 2. Redistributions in binary form must reproduce the above copyright
> > - * notice, this list of conditions and the following disclaimer in the
> > - * documentation and/or other materials provided with the distribution.
> > - * 4. Neither the name of the University nor the names of its contributors
> > - * may be used to endorse or promote products derived from this software
> > - * without specific prior written permission.
> > - *
> > - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
> > - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
> > - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
> PURPOSE
> > - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
> > - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
> CONSEQUENTIAL
> > - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
> > - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
> > - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
> STRICT
> > - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
> WAY
> > - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
> > - * SUCH DAMAGE.
> > - */
> > -
> > -#include <uk/arch/types.h>
> > -#include <uk/arch/limits.h>
> > -
> > -/*
> > - * Multiprecision divide. This algorithm is from Knuth vol. 2 (2nd ed),
> > - * section 4.3.1, pp. 257--259.
> > - */
> > -
> > -/*
> > - * From
> > - * @(#)quad.h 8.1 (Berkeley) 6/4/93
> > - */
> > -
> > -#ifdef __BIG_ENDIAN
> > -#define _QUAD_HIGHWORD 0
> > -#define _QUAD_LOWWORD 1
> > -#else /* __LITTLE_ENDIAN */
> > -#define _QUAD_HIGHWORD 1
> > -#define _QUAD_LOWWORD 0
> > -#endif
> > -
> > -/*
> > - * Define high and low longwords.
> > - */
> > -#define QUADH _QUAD_HIGHWORD
> > -#define QUADL _QUAD_LOWWORD
> > -
> > -/*
> > - * Total number of bits in a quad_t and in the pieces that make it up.
> > - * These are used for shifting, and also below for halfword extraction
> > - * and assembly.
> > - */
> > -#define CHAR_BIT 8 /* number of bits in a char */
> > -#define QUAD_BITS (sizeof(__s64) * CHAR_BIT)
> > -#define LONG_BITS (sizeof(long) * CHAR_BIT)
> > -#define HALF_BITS (sizeof(long) * CHAR_BIT / 2)
> > -
> > -#define DIGIT_BASE (1 << HALF_BITS) /* digit base */
> > -/*
> > - * Extract high and low shortwords from longword, and move low shortword of
> > - * longword to upper half of long, i.e., produce the upper longword of
> > - * ((quad_t)(x) << (number_of_bits_in_long/2)). (`x' must actually be
> u_long.)
> > - *
> > - * These are used in the multiply code, to split a longword into upper
> > - * and lower halves, and to reassemble a product as a quad_t, shifted left
> > - * (sizeof(long)*CHAR_BIT/2).
> > - */
> > -#define HHALF(x) ((x) >> HALF_BITS)
> > -#define LHALF(x) ((x) & ((1 << HALF_BITS) - 1))
> > -#define LHUP(x) ((x) << HALF_BITS)
> > -
> > -#define COMBINE(a, b) (((unsigned long)(a) << HALF_BITS) | (b))
> > -
> > -/*
> > - * Depending on the desired operation, we view a `long long' (aka quad_t)
> in
> > - * one or more of the following formats.
> > - */
> > -union uu {
> > - __s64 q; /* as a (signed) quad */
> > - __s64 uq; /* as an unsigned quad */
> > - long sl[2]; /* as two signed longs */
> > - unsigned long ul[2]; /* as two unsigned longs */
> > -};
> > -
> > -#define B (1 << HALF_BITS) /* digit base */
> > -
> > -/* select a type for digits in base B: use unsigned short if they fit */
> > -#if __UL_MAX == 0xffffffff && __US_MAX >= 0xffff
> > -typedef unsigned short digit;
> > -#else
> > -typedef unsigned long digit;
> > -#endif
> > -
> > -/*
> > - * Shift p[0]..p[len] left `sh' bits, ignoring any bits that
> > - * `fall out' the left (there never will be any such anyway).
> > - * We may assume len >= 0. NOTE THAT THIS WRITES len+1 DIGITS.
> > - */
> > -static void __shl(register digit *p, register int len, register int sh)
> > -{
> > - register int i;
> > -
> > - for (i = 0; i < len; i++)
> > - p[i] = LHALF(p[i] << sh) | (p[i + 1] >> (HALF_BITS - sh));
> > - p[i] = LHALF(p[i] << sh);
> > -}
> > -
> > -/*
> > - * __qdivrem(u, v, rem) returns u/v and, optionally, sets *rem to u%v.
> > - *
> > - * We do this in base 2-sup-HALF_BITS, so that all intermediate products
> > - * fit within u_long. As a consequence, the maximum length dividend and
> > - * divisor are 4 `digits' in this base (they are shorter if they have
> > - * leading zeros).
> > - */
> > -__u64 __qdivrem(__u64 uq, __u64 vq, __u64 *arq)
> > -{
> > - union uu tmp;
> > - digit *u, *v, *q;
> > - register digit v1, v2;
> > - unsigned long qhat, rhat, t;
> > - int m, n, d, j, i;
> > - digit uspace[5], vspace[5], qspace[5];
> > -
> > - /*
> > - * Take care of special cases: divide by zero, and u < v.
> > - */
> > - if (vq == 0) {
> > - /* divide by zero. */
> > - static volatile const unsigned int zero = 0;
> > -
> > - tmp.ul[QUADH] = tmp.ul[QUADL] = 1 / zero;
> > - if (arq)
> > - *arq = uq;
> > - return tmp.q;
> > - }
> > - if (uq < vq) {
> > - if (arq)
> > - *arq = uq;
> > - return 0;
> > - }
> > - u = &uspace[0];
> > - v = &vspace[0];
> > - q = &qspace[0];
> > -
> > - /*
> > - * Break dividend and divisor into digits in base B, then
> > - * count leading zeros to determine m and n. When done, we
> > - * will have:
> > - * u = (u[1]u[2]...u[m+n]) sub B
> > - * v = (v[1]v[2]...v[n]) sub B
> > - * v[1] != 0
> > - * 1 < n <= 4 (if n = 1, we use a different division algorithm)
> > - * m >= 0 (otherwise u < v, which we already checked)
> > - * m + n = 4
> > - * and thus
> > - * m = 4 - n <= 2
> > - */
> > - tmp.uq = uq;
> > - u[0] = 0;
> > - u[1] = HHALF(tmp.ul[QUADH]);
> > - u[2] = LHALF(tmp.ul[QUADH]);
> > - u[3] = HHALF(tmp.ul[QUADL]);
> > - u[4] = LHALF(tmp.ul[QUADL]);
> > - tmp.uq = vq;
> > - v[1] = HHALF(tmp.ul[QUADH]);
> > - v[2] = LHALF(tmp.ul[QUADH]);
> > - v[3] = HHALF(tmp.ul[QUADL]);
> > - v[4] = LHALF(tmp.ul[QUADL]);
> > - for (n = 4; v[1] == 0; v++) {
> > - if (--n == 1) {
> > - unsigned long rbj; /* r*B+u[j] (not root boy jim) */
> > - digit q1, q2, q3, q4;
> > -
> > - /*
> > - * Change of plan, per exercise 16.
> > - * r = 0;
> > - * for j = 1..4:
> > - * q[j] = floor((r*B + u[j]) / v),
> > - * r = (r*B + u[j]) % v;
> > - * We unroll this completely here.
> > - */
> > - t = v[2]; /* nonzero, by definition */
> > - q1 = u[1] / t;
> > - rbj = COMBINE(u[1] % t, u[2]);
> > - q2 = rbj / t;
> > - rbj = COMBINE(rbj % t, u[3]);
> > - q3 = rbj / t;
> > - rbj = COMBINE(rbj % t, u[4]);
> > - q4 = rbj / t;
> > - if (arq)
> > - *arq = rbj % t;
> > - tmp.ul[QUADH] = COMBINE(q1, q2);
> > - tmp.ul[QUADL] = COMBINE(q3, q4);
> > - return tmp.q;
> > - }
> > - }
> > -
> > - /*
> > - * By adjusting q once we determine m, we can guarantee that
> > - * there is a complete four-digit quotient at &qspace[1] when
> > - * we finally stop.
> > - */
> > - for (m = 4 - n; u[1] == 0; u++)
> > - m--;
> > - for (i = 4 - m; --i >= 0;)
> > - q[i] = 0;
> > - q += 4 - m;
> > -
> > - /*
> > - * Here we run Program D, translated from MIX to C and acquiring
> > - * a few minor changes.
> > - *
> > - * D1: choose multiplier 1 << d to ensure v[1] >= B/2.
> > - */
> > - d = 0;
> > - for (t = v[1]; t < B / 2; t <<= 1)
> > - d++;
> > - if (d > 0) {
> > - __shl(&u[0], m + n, d); /* u <<= d */
> > - __shl(&v[1], n - 1, d); /* v <<= d */
> > - }
> > - /*
> > - * D2: j = 0.
> > - */
> > - j = 0;
> > - v1 = v[1]; /* for D3 -- note that v[1..n] are constant */
> > - v2 = v[2]; /* for D3 */
> > - do {
> > - register digit uj0, uj1, uj2;
> > -
> > - /*
> > - * D3: Calculate qhat (\^q, in TeX notation).
> > - * Let qhat = min((u[j]*B + u[j+1])/v[1], B-1), and
> > - * let rhat = (u[j]*B + u[j+1]) mod v[1].
> > - * While rhat < B and v[2]*qhat > rhat*B+u[j+2],
> > - * decrement qhat and increase rhat correspondingly.
> > - * Note that if rhat >= B, v[2]*qhat < rhat*B.
> > - */
> > - uj0 = u[j + 0]; /* for D3 only -- note that u[j+...] change */
> > - uj1 = u[j + 1]; /* for D3 only */
> > - uj2 = u[j + 2]; /* for D3 only */
> > - if (uj0 == v1) {
> > - qhat = B;
> > - rhat = uj1;
> > - goto qhat_too_big;
> > - } else {
> > - unsigned long nn = COMBINE(uj0, uj1);
> > -
> > - qhat = nn / v1;
> > - rhat = nn % v1;
> > - }
> > - while (v2 * qhat > COMBINE(rhat, uj2)) {
> > -qhat_too_big:
> > - qhat--;
> > - if ((rhat += v1) >= B)
> > - break;
> > - }
> > - /*
> > - * D4: Multiply and subtract.
> > - * The variable `t' holds any borrows across the loop.
> > - * We split this up so that we do not require v[0] = 0,
> > - * and to eliminate a final special case.
> > - */
> > - for (t = 0, i = n; i > 0; i--) {
> > - t = u[i + j] - v[i] * qhat - t;
> > - u[i + j] = LHALF(t);
> > - t = (B - HHALF(t)) & (B - 1);
> > - }
> > - t = u[j] - t;
> > - u[j] = LHALF(t);
> > - /*
> > - * D5: test remainder.
> > - * There is a borrow if and only if HHALF(t) is nonzero;
> > - * in that (rare) case, qhat was too large (by exactly 1).
> > - * Fix it by adding v[1..n] to u[j..j+n].
> > - */
> > - if (HHALF(t)) {
> > - qhat--;
> > - for (t = 0, i = n; i > 0; i--) { /* D6: add back. */
> > - t += u[i + j] + v[i];
> > - u[i + j] = LHALF(t);
> > - t = HHALF(t);
> > - }
> > - u[j] = LHALF(u[j] + t);
> > - }
> > - q[j] = qhat;
> > - } while (++j <= m); /* D7: loop on j. */
> > -
> > - /*
> > - * If caller wants the remainder, we have to calculate it as
> > - * u[m..m+n] >> d (this is at most n digits and thus fits in
> > - * u[m+1..m+n], but we may need more source digits).
> > - */
> > - if (arq) {
> > - if (d) {
> > - for (i = m + n; i > m; --i)
> > - u[i] = (u[i] >> d) |
> > - LHALF(u[i - 1] << (HALF_BITS - d));
> > - u[i] = 0;
> > - }
> > - tmp.ul[QUADH] = COMBINE(uspace[1], uspace[2]);
> > - tmp.ul[QUADL] = COMBINE(uspace[3], uspace[4]);
> > - *arq = tmp.q;
> > - }
> > -
> > - tmp.ul[QUADH] = COMBINE(qspace[1], qspace[2]);
> > - tmp.ul[QUADL] = COMBINE(qspace[3], qspace[4]);
> > - return tmp.q;
> > -}
> >
_______________________________________________
Minios-devel mailing list
Minios-devel@xxxxxxxxxxxxxxxxxxxx
https://lists.xenproject.org/mailman/listinfo/minios-devel
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |