[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [xen master] lib: move 64-bit div/mod compiler helpers
commit 849db4945b8b166085751681ac8b4f5b7cefece0 Author: Jan Beulich <jbeulich@xxxxxxxx> AuthorDate: Fri Apr 16 14:43:10 2021 +0200 Commit: Jan Beulich <jbeulich@xxxxxxxx> CommitDate: Fri Apr 16 14:43:10 2021 +0200 lib: move 64-bit div/mod compiler helpers These were built for 32-bit architectures only (the same code could, with some tweaking, sensibly be used to provide TI-mode helpers on 64-bit arch-es) - retain this property, while still avoiding to have a CU without any contents at all. For this, Arm's CONFIG_64BIT gets generalized. Note that we imply "32-bit arch" to be the same as BITS_PER_LONG == 32, i.e. we aren't (not just here) prepared to have a 64-bit arch with BITS_PER_LONG == 32. Yet even if we supported such, likely the compiler would get away there without invoking these helpers, so the code would remain unused in practice. Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx> Acked-by: Julien Grall <jgrall@xxxxxxxxxx> --- xen/arch/Kconfig | 2 + xen/arch/arm/Kconfig | 12 +- xen/arch/x86/Kconfig | 1 + xen/common/Makefile | 1 - xen/common/lib.c | 404 --------------------------------------------------- xen/lib/Makefile | 4 + xen/lib/divmod.c | 402 ++++++++++++++++++++++++++++++++++++++++++++++++++ 7 files changed, 412 insertions(+), 414 deletions(-) diff --git a/xen/arch/Kconfig b/xen/arch/Kconfig index d144d4c8d3..f16eb0df43 100644 --- a/xen/arch/Kconfig +++ b/xen/arch/Kconfig @@ -1,3 +1,5 @@ +config 64BIT + bool config NR_CPUS int "Maximum number of CPUs" diff --git a/xen/arch/arm/Kconfig b/xen/arch/arm/Kconfig index 330bbf6232..ecfa6822e4 100644 --- a/xen/arch/arm/Kconfig +++ b/xen/arch/arm/Kconfig @@ -1,17 +1,11 @@ -config 64BIT - bool - default "$(ARCH)" != "arm32" - help - Say yes to build a 64-bit Xen - Say no to build a 32-bit Xen - config ARM_32 def_bool y - depends on !64BIT + depends on "$(ARCH)" = "arm32" config ARM_64 def_bool y - depends on 64BIT + depends on !ARM_32 + select 64BIT select HAS_FAST_MULTIPLY config ARM diff --git a/xen/arch/x86/Kconfig b/xen/arch/x86/Kconfig index cb401fa2e5..57776d5106 100644 --- a/xen/arch/x86/Kconfig +++ b/xen/arch/x86/Kconfig @@ -1,5 +1,6 @@ config X86_64 def_bool y + select 64BIT config X86 def_bool y diff --git a/xen/common/Makefile b/xen/common/Makefile index 71c1d466bd..e2a7e62d14 100644 --- a/xen/common/Makefile +++ b/xen/common/Makefile @@ -21,7 +21,6 @@ obj-y += kernel.o obj-y += keyhandler.o obj-$(CONFIG_KEXEC) += kexec.o obj-$(CONFIG_KEXEC) += kimage.o -obj-y += lib.o obj-$(CONFIG_LIVEPATCH) += livepatch.o livepatch_elf.o obj-$(CONFIG_MEM_ACCESS) += mem_access.o obj-y += memory.o diff --git a/xen/common/lib.c b/xen/common/lib.c deleted file mode 100644 index 5b8f49153d..0000000000 --- a/xen/common/lib.c +++ /dev/null @@ -1,404 +0,0 @@ -#include <xen/lib.h> -#include <xen/types.h> -#include <asm/byteorder.h> - -/* - * A couple of 64 bit operations ported from FreeBSD. - * The code within the '#if BITS_PER_LONG == 32' block below, and no other - * code in this file, is distributed under the following licensing terms - * This is the modified '3-clause' BSD license with the obnoxious - * advertising clause removed, as permitted by University of California. - * - * Copyright (c) 1992, 1993 - * The Regents of the University of California. All rights reserved. - * - * This software was developed by the Computer Systems Engineering group - * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and - * contributed to Berkeley. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ -#if BITS_PER_LONG == 32 - -/* - * Depending on the desired operation, we view a `long long' (aka quad_t) in - * one or more of the following formats. - */ -union uu { - s64 q; /* as a (signed) quad */ - s64 uq; /* as an unsigned quad */ - long sl[2]; /* as two signed longs */ - unsigned long ul[2]; /* as two unsigned longs */ -}; - -#ifdef __BIG_ENDIAN -#define _QUAD_HIGHWORD 0 -#define _QUAD_LOWWORD 1 -#else /* __LITTLE_ENDIAN */ -#define _QUAD_HIGHWORD 1 -#define _QUAD_LOWWORD 0 -#endif - -/* - * Define high and low longwords. - */ -#define H _QUAD_HIGHWORD -#define L _QUAD_LOWWORD - -/* - * Total number of bits in a quad_t and in the pieces that make it up. - * These are used for shifting, and also below for halfword extraction - * and assembly. - */ -#define CHAR_BIT 8 /* number of bits in a char */ -#define QUAD_BITS (sizeof(s64) * CHAR_BIT) -#define LONG_BITS (sizeof(long) * CHAR_BIT) -#define HALF_BITS (sizeof(long) * CHAR_BIT / 2) - -/* - * Extract high and low shortwords from longword, and move low shortword of - * longword to upper half of long, i.e., produce the upper longword of - * ((quad_t)(x) << (number_of_bits_in_long/2)). (`x' must actually be - * unsigned long.) - * - * These are used in the multiply code, to split a longword into upper - * and lower halves, and to reassemble a product as a quad_t, shifted left - * (sizeof(long)*CHAR_BIT/2). - */ -#define HHALF(x) ((x) >> HALF_BITS) -#define LHALF(x) ((x) & ((1 << HALF_BITS) - 1)) -#define LHUP(x) ((x) << HALF_BITS) - -/* - * Multiprecision divide. This algorithm is from Knuth vol. 2 (2nd ed), - * section 4.3.1, pp. 257--259. - */ -#define B (1 << HALF_BITS) /* digit base */ - -/* Combine two `digits' to make a single two-digit number. */ -#define COMBINE(a, b) (((unsigned long)(a) << HALF_BITS) | (b)) - -/* select a type for digits in base B */ -typedef unsigned long digit; - -/* - * Shift p[0]..p[len] left `sh' bits, ignoring any bits that - * `fall out' the left (there never will be any such anyway). - * We may assume len >= 0. NOTE THAT THIS WRITES len+1 DIGITS. - */ -static void shl(register digit *p, register int len, register int sh) -{ - register int i; - - for (i = 0; i < len; i++) - p[i] = LHALF(p[i] << sh) | (p[i + 1] >> (HALF_BITS - sh)); - p[i] = LHALF(p[i] << sh); -} - -/* - * __qdivrem(u, v, rem) returns u/v and, optionally, sets *rem to u%v. - * - * We do this in base 2-sup-HALF_BITS, so that all intermediate products - * fit within unsigned long. As a consequence, the maximum length dividend - * and divisor are 4 `digits' in this base (they are shorter if they have - * leading zeros). - */ -u64 __qdivrem(u64 uq, u64 vq, u64 *arq) -{ - union uu tmp; - digit *u, *v, *q; - register digit v1, v2; - unsigned long qhat, rhat, t; - int m, n, d, j, i; - digit uspace[5], vspace[5], qspace[5]; - - /* - * Take care of special cases: divide by zero, and u < v. - */ - if (vq == 0) { - /* divide by zero. */ - static volatile const unsigned int zero = 0; - - tmp.ul[H] = tmp.ul[L] = 1 / zero; - if (arq) - *arq = uq; - return (tmp.q); - } - if (uq < vq) { - if (arq) - *arq = uq; - return (0); - } - u = &uspace[0]; - v = &vspace[0]; - q = &qspace[0]; - - /* - * Break dividend and divisor into digits in base B, then - * count leading zeros to determine m and n. When done, we - * will have: - * u = (u[1]u[2]...u[m+n]) sub B - * v = (v[1]v[2]...v[n]) sub B - * v[1] != 0 - * 1 < n <= 4 (if n = 1, we use a different division algorithm) - * m >= 0 (otherwise u < v, which we already checked) - * m + n = 4 - * and thus - * m = 4 - n <= 2 - */ - tmp.uq = uq; - u[0] = 0; - u[1] = HHALF(tmp.ul[H]); - u[2] = LHALF(tmp.ul[H]); - u[3] = HHALF(tmp.ul[L]); - u[4] = LHALF(tmp.ul[L]); - tmp.uq = vq; - v[1] = HHALF(tmp.ul[H]); - v[2] = LHALF(tmp.ul[H]); - v[3] = HHALF(tmp.ul[L]); - v[4] = LHALF(tmp.ul[L]); - for (n = 4; v[1] == 0; v++) { - if (--n == 1) { - unsigned long rbj; /* r*B+u[j] (not root boy jim) */ - digit q1, q2, q3, q4; - - /* - * Change of plan, per exercise 16. - * r = 0; - * for j = 1..4: - * q[j] = floor((r*B + u[j]) / v), - * r = (r*B + u[j]) % v; - * We unroll this completely here. - */ - t = v[2]; /* nonzero, by definition */ - q1 = u[1] / t; - rbj = COMBINE(u[1] % t, u[2]); - q2 = rbj / t; - rbj = COMBINE(rbj % t, u[3]); - q3 = rbj / t; - rbj = COMBINE(rbj % t, u[4]); - q4 = rbj / t; - if (arq) - *arq = rbj % t; - tmp.ul[H] = COMBINE(q1, q2); - tmp.ul[L] = COMBINE(q3, q4); - return (tmp.q); - } - } - - /* - * By adjusting q once we determine m, we can guarantee that - * there is a complete four-digit quotient at &qspace[1] when - * we finally stop. - */ - for (m = 4 - n; u[1] == 0; u++) - m--; - for (i = 4 - m; --i >= 0;) - q[i] = 0; - q += 4 - m; - - /* - * Here we run Program D, translated from MIX to C and acquiring - * a few minor changes. - * - * D1: choose multiplier 1 << d to ensure v[1] >= B/2. - */ - d = 0; - for (t = v[1]; t < B / 2; t <<= 1) - d++; - if (d > 0) { - shl(&u[0], m + n, d); /* u <<= d */ - shl(&v[1], n - 1, d); /* v <<= d */ - } - /* - * D2: j = 0. - */ - j = 0; - v1 = v[1]; /* for D3 -- note that v[1..n] are constant */ - v2 = v[2]; /* for D3 */ - do { - register digit uj0, uj1, uj2; - - /* - * D3: Calculate qhat (\^q, in TeX notation). - * Let qhat = min((u[j]*B + u[j+1])/v[1], B-1), and - * let rhat = (u[j]*B + u[j+1]) mod v[1]. - * While rhat < B and v[2]*qhat > rhat*B+u[j+2], - * decrement qhat and increase rhat correspondingly. - * Note that if rhat >= B, v[2]*qhat < rhat*B. - */ - uj0 = u[j + 0]; /* for D3 only -- note that u[j+...] change */ - uj1 = u[j + 1]; /* for D3 only */ - uj2 = u[j + 2]; /* for D3 only */ - if (uj0 == v1) { - qhat = B; - rhat = uj1; - goto qhat_too_big; - } else { - unsigned long nn = COMBINE(uj0, uj1); - - qhat = nn / v1; - rhat = nn % v1; - } - while (v2 * qhat > COMBINE(rhat, uj2)) { - qhat_too_big: - qhat--; - if ((rhat += v1) >= B) - break; - } - /* - * D4: Multiply and subtract. - * The variable `t' holds any borrows across the loop. - * We split this up so that we do not require v[0] = 0, - * and to eliminate a final special case. - */ - for (t = 0, i = n; i > 0; i--) { - t = u[i + j] - v[i] * qhat - t; - u[i + j] = LHALF(t); - t = (B - HHALF(t)) & (B - 1); - } - t = u[j] - t; - u[j] = LHALF(t); - /* - * D5: test remainder. - * There is a borrow if and only if HHALF(t) is nonzero; - * in that (rare) case, qhat was too large (by exactly 1). - * Fix it by adding v[1..n] to u[j..j+n]. - */ - if (HHALF(t)) { - qhat--; - for (t = 0, i = n; i > 0; i--) { /* D6: add back. */ - t += u[i + j] + v[i]; - u[i + j] = LHALF(t); - t = HHALF(t); - } - u[j] = LHALF(u[j] + t); - } - q[j] = qhat; - } while (++j <= m); /* D7: loop on j. */ - - /* - * If caller wants the remainder, we have to calculate it as - * u[m..m+n] >> d (this is at most n digits and thus fits in - * u[m+1..m+n], but we may need more source digits). - */ - if (arq) { - if (d) { - for (i = m + n; i > m; --i) - u[i] = (u[i] >> d) | - LHALF(u[i - 1] << (HALF_BITS - d)); - u[i] = 0; - } - tmp.ul[H] = COMBINE(uspace[1], uspace[2]); - tmp.ul[L] = COMBINE(uspace[3], uspace[4]); - *arq = tmp.q; - } - - tmp.ul[H] = COMBINE(qspace[1], qspace[2]); - tmp.ul[L] = COMBINE(qspace[3], qspace[4]); - return (tmp.q); -} - -/* - * Divide two signed quads. - * Truncates towards zero, as required by C99. - */ -s64 __divdi3(s64 a, s64 b) -{ - u64 ua, ub, uq; - int neg = (a < 0) ^ (b < 0); - ua = (a < 0) ? -(u64)a : a; - ub = (b < 0) ? -(u64)b : b; - uq = __qdivrem(ua, ub, (u64 *)0); - return (neg ? -uq : uq); -} - - -/* - * Divide two unsigned quads. - */ -u64 __udivdi3(u64 a, u64 b) -{ - return __qdivrem(a, b, (u64 *)0); -} - -/* - * Remainder of unsigned quad division - */ -u64 __umoddi3(u64 a, u64 b) -{ - u64 rem; - __qdivrem(a, b, &rem); - return rem; -} - -/* - * Remainder of signed quad division. - * Truncates towards zero, as required by C99: - * 11 % 5 = 1 - * -11 % 5 = -1 - * 11 % -5 = 1 - * -11 % -5 = -1 - */ -s64 __moddi3(s64 a, s64 b) -{ - u64 ua, ub, urem; - int neg = (a < 0); - ua = neg ? -(u64)a : a; - ub = (b < 0) ? -(u64)b : b; - __qdivrem(ua, ub, &urem); - return (neg ? -urem : urem); -} - -/* - * Quotient and remainder of unsigned long long division - */ -s64 __ldivmod_helper(s64 a, s64 b, s64 *r) -{ - u64 ua, ub, rem, quot; - - ua = ABS(a); - ub = ABS(b); - quot = __qdivrem(ua, ub, &rem); - if ( a < 0 ) - *r = -rem; - else - *r = rem; - if ( (a < 0) ^ (b < 0) ) - return -quot; - else - return quot; -} -#endif /* BITS_PER_LONG == 32 */ - -/* - * Local variables: - * mode: C - * c-file-style: "BSD" - * c-basic-offset: 4 - * tab-width: 4 - * indent-tabs-mode: nil - * End: - */ diff --git a/xen/lib/Makefile b/xen/lib/Makefile index 0b274583ef..a5dc1442a4 100644 --- a/xen/lib/Makefile +++ b/xen/lib/Makefile @@ -10,3 +10,7 @@ lib-y += rbtree.o lib-y += sort.o lib-$(CONFIG_X86) += xxhash32.o lib-$(CONFIG_X86) += xxhash64.o + +lib32-y := divmod.o +lib32-$(CONFIG_64BIT) := +lib-y += $(lib32-y) diff --git a/xen/lib/divmod.c b/xen/lib/divmod.c new file mode 100644 index 0000000000..0be6ccc700 --- /dev/null +++ b/xen/lib/divmod.c @@ -0,0 +1,402 @@ +#include <xen/lib.h> +#include <xen/types.h> +#include <asm/byteorder.h> + +/* + * A couple of 64 bit operations ported from FreeBSD. + * The code within the '#if BITS_PER_LONG == 32' block below, and no other + * code in this file, is distributed under the following licensing terms + * This is the modified '3-clause' BSD license with the obnoxious + * advertising clause removed, as permitted by University of California. + * + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This software was developed by the Computer Systems Engineering group + * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and + * contributed to Berkeley. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * Depending on the desired operation, we view a `long long' (aka quad_t) in + * one or more of the following formats. + */ +union uu { + s64 q; /* as a (signed) quad */ + s64 uq; /* as an unsigned quad */ + long sl[2]; /* as two signed longs */ + unsigned long ul[2]; /* as two unsigned longs */ +}; + +#ifdef __BIG_ENDIAN +#define _QUAD_HIGHWORD 0 +#define _QUAD_LOWWORD 1 +#else /* __LITTLE_ENDIAN */ +#define _QUAD_HIGHWORD 1 +#define _QUAD_LOWWORD 0 +#endif + +/* + * Define high and low longwords. + */ +#define H _QUAD_HIGHWORD +#define L _QUAD_LOWWORD + +/* + * Total number of bits in a quad_t and in the pieces that make it up. + * These are used for shifting, and also below for halfword extraction + * and assembly. + */ +#define CHAR_BIT 8 /* number of bits in a char */ +#define QUAD_BITS (sizeof(s64) * CHAR_BIT) +#define LONG_BITS (sizeof(long) * CHAR_BIT) +#define HALF_BITS (sizeof(long) * CHAR_BIT / 2) + +/* + * Extract high and low shortwords from longword, and move low shortword of + * longword to upper half of long, i.e., produce the upper longword of + * ((quad_t)(x) << (number_of_bits_in_long/2)). (`x' must actually be + * unsigned long.) + * + * These are used in the multiply code, to split a longword into upper + * and lower halves, and to reassemble a product as a quad_t, shifted left + * (sizeof(long)*CHAR_BIT/2). + */ +#define HHALF(x) ((x) >> HALF_BITS) +#define LHALF(x) ((x) & ((1 << HALF_BITS) - 1)) +#define LHUP(x) ((x) << HALF_BITS) + +/* + * Multiprecision divide. This algorithm is from Knuth vol. 2 (2nd ed), + * section 4.3.1, pp. 257--259. + */ +#define B (1 << HALF_BITS) /* digit base */ + +/* Combine two `digits' to make a single two-digit number. */ +#define COMBINE(a, b) (((unsigned long)(a) << HALF_BITS) | (b)) + +/* select a type for digits in base B */ +typedef unsigned long digit; + +/* + * Shift p[0]..p[len] left `sh' bits, ignoring any bits that + * `fall out' the left (there never will be any such anyway). + * We may assume len >= 0. NOTE THAT THIS WRITES len+1 DIGITS. + */ +static void shl(register digit *p, register int len, register int sh) +{ + register int i; + + for (i = 0; i < len; i++) + p[i] = LHALF(p[i] << sh) | (p[i + 1] >> (HALF_BITS - sh)); + p[i] = LHALF(p[i] << sh); +} + +/* + * __qdivrem(u, v, rem) returns u/v and, optionally, sets *rem to u%v. + * + * We do this in base 2-sup-HALF_BITS, so that all intermediate products + * fit within unsigned long. As a consequence, the maximum length dividend + * and divisor are 4 `digits' in this base (they are shorter if they have + * leading zeros). + */ +u64 __qdivrem(u64 uq, u64 vq, u64 *arq) +{ + union uu tmp; + digit *u, *v, *q; + register digit v1, v2; + unsigned long qhat, rhat, t; + int m, n, d, j, i; + digit uspace[5], vspace[5], qspace[5]; + + /* + * Take care of special cases: divide by zero, and u < v. + */ + if (vq == 0) { + /* divide by zero. */ + static volatile const unsigned int zero = 0; + + tmp.ul[H] = tmp.ul[L] = 1 / zero; + if (arq) + *arq = uq; + return (tmp.q); + } + if (uq < vq) { + if (arq) + *arq = uq; + return (0); + } + u = &uspace[0]; + v = &vspace[0]; + q = &qspace[0]; + + /* + * Break dividend and divisor into digits in base B, then + * count leading zeros to determine m and n. When done, we + * will have: + * u = (u[1]u[2]...u[m+n]) sub B + * v = (v[1]v[2]...v[n]) sub B + * v[1] != 0 + * 1 < n <= 4 (if n = 1, we use a different division algorithm) + * m >= 0 (otherwise u < v, which we already checked) + * m + n = 4 + * and thus + * m = 4 - n <= 2 + */ + tmp.uq = uq; + u[0] = 0; + u[1] = HHALF(tmp.ul[H]); + u[2] = LHALF(tmp.ul[H]); + u[3] = HHALF(tmp.ul[L]); + u[4] = LHALF(tmp.ul[L]); + tmp.uq = vq; + v[1] = HHALF(tmp.ul[H]); + v[2] = LHALF(tmp.ul[H]); + v[3] = HHALF(tmp.ul[L]); + v[4] = LHALF(tmp.ul[L]); + for (n = 4; v[1] == 0; v++) { + if (--n == 1) { + unsigned long rbj; /* r*B+u[j] (not root boy jim) */ + digit q1, q2, q3, q4; + + /* + * Change of plan, per exercise 16. + * r = 0; + * for j = 1..4: + * q[j] = floor((r*B + u[j]) / v), + * r = (r*B + u[j]) % v; + * We unroll this completely here. + */ + t = v[2]; /* nonzero, by definition */ + q1 = u[1] / t; + rbj = COMBINE(u[1] % t, u[2]); + q2 = rbj / t; + rbj = COMBINE(rbj % t, u[3]); + q3 = rbj / t; + rbj = COMBINE(rbj % t, u[4]); + q4 = rbj / t; + if (arq) + *arq = rbj % t; + tmp.ul[H] = COMBINE(q1, q2); + tmp.ul[L] = COMBINE(q3, q4); + return (tmp.q); + } + } + + /* + * By adjusting q once we determine m, we can guarantee that + * there is a complete four-digit quotient at &qspace[1] when + * we finally stop. + */ + for (m = 4 - n; u[1] == 0; u++) + m--; + for (i = 4 - m; --i >= 0;) + q[i] = 0; + q += 4 - m; + + /* + * Here we run Program D, translated from MIX to C and acquiring + * a few minor changes. + * + * D1: choose multiplier 1 << d to ensure v[1] >= B/2. + */ + d = 0; + for (t = v[1]; t < B / 2; t <<= 1) + d++; + if (d > 0) { + shl(&u[0], m + n, d); /* u <<= d */ + shl(&v[1], n - 1, d); /* v <<= d */ + } + /* + * D2: j = 0. + */ + j = 0; + v1 = v[1]; /* for D3 -- note that v[1..n] are constant */ + v2 = v[2]; /* for D3 */ + do { + register digit uj0, uj1, uj2; + + /* + * D3: Calculate qhat (\^q, in TeX notation). + * Let qhat = min((u[j]*B + u[j+1])/v[1], B-1), and + * let rhat = (u[j]*B + u[j+1]) mod v[1]. + * While rhat < B and v[2]*qhat > rhat*B+u[j+2], + * decrement qhat and increase rhat correspondingly. + * Note that if rhat >= B, v[2]*qhat < rhat*B. + */ + uj0 = u[j + 0]; /* for D3 only -- note that u[j+...] change */ + uj1 = u[j + 1]; /* for D3 only */ + uj2 = u[j + 2]; /* for D3 only */ + if (uj0 == v1) { + qhat = B; + rhat = uj1; + goto qhat_too_big; + } else { + unsigned long nn = COMBINE(uj0, uj1); + + qhat = nn / v1; + rhat = nn % v1; + } + while (v2 * qhat > COMBINE(rhat, uj2)) { + qhat_too_big: + qhat--; + if ((rhat += v1) >= B) + break; + } + /* + * D4: Multiply and subtract. + * The variable `t' holds any borrows across the loop. + * We split this up so that we do not require v[0] = 0, + * and to eliminate a final special case. + */ + for (t = 0, i = n; i > 0; i--) { + t = u[i + j] - v[i] * qhat - t; + u[i + j] = LHALF(t); + t = (B - HHALF(t)) & (B - 1); + } + t = u[j] - t; + u[j] = LHALF(t); + /* + * D5: test remainder. + * There is a borrow if and only if HHALF(t) is nonzero; + * in that (rare) case, qhat was too large (by exactly 1). + * Fix it by adding v[1..n] to u[j..j+n]. + */ + if (HHALF(t)) { + qhat--; + for (t = 0, i = n; i > 0; i--) { /* D6: add back. */ + t += u[i + j] + v[i]; + u[i + j] = LHALF(t); + t = HHALF(t); + } + u[j] = LHALF(u[j] + t); + } + q[j] = qhat; + } while (++j <= m); /* D7: loop on j. */ + + /* + * If caller wants the remainder, we have to calculate it as + * u[m..m+n] >> d (this is at most n digits and thus fits in + * u[m+1..m+n], but we may need more source digits). + */ + if (arq) { + if (d) { + for (i = m + n; i > m; --i) + u[i] = (u[i] >> d) | + LHALF(u[i - 1] << (HALF_BITS - d)); + u[i] = 0; + } + tmp.ul[H] = COMBINE(uspace[1], uspace[2]); + tmp.ul[L] = COMBINE(uspace[3], uspace[4]); + *arq = tmp.q; + } + + tmp.ul[H] = COMBINE(qspace[1], qspace[2]); + tmp.ul[L] = COMBINE(qspace[3], qspace[4]); + return (tmp.q); +} + +/* + * Divide two signed quads. + * Truncates towards zero, as required by C99. + */ +s64 __divdi3(s64 a, s64 b) +{ + u64 ua, ub, uq; + int neg = (a < 0) ^ (b < 0); + ua = (a < 0) ? -(u64)a : a; + ub = (b < 0) ? -(u64)b : b; + uq = __qdivrem(ua, ub, (u64 *)0); + return (neg ? -uq : uq); +} + + +/* + * Divide two unsigned quads. + */ +u64 __udivdi3(u64 a, u64 b) +{ + return __qdivrem(a, b, (u64 *)0); +} + +/* + * Remainder of unsigned quad division + */ +u64 __umoddi3(u64 a, u64 b) +{ + u64 rem; + __qdivrem(a, b, &rem); + return rem; +} + +/* + * Remainder of signed quad division. + * Truncates towards zero, as required by C99: + * 11 % 5 = 1 + * -11 % 5 = -1 + * 11 % -5 = 1 + * -11 % -5 = -1 + */ +s64 __moddi3(s64 a, s64 b) +{ + u64 ua, ub, urem; + int neg = (a < 0); + ua = neg ? -(u64)a : a; + ub = (b < 0) ? -(u64)b : b; + __qdivrem(ua, ub, &urem); + return (neg ? -urem : urem); +} + +/* + * Quotient and remainder of unsigned long long division + */ +s64 __ldivmod_helper(s64 a, s64 b, s64 *r) +{ + u64 ua, ub, rem, quot; + + ua = ABS(a); + ub = ABS(b); + quot = __qdivrem(ua, ub, &rem); + if ( a < 0 ) + *r = -rem; + else + *r = rem; + if ( (a < 0) ^ (b < 0) ) + return -quot; + else + return quot; +} + +/* + * Local variables: + * mode: C + * c-file-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ -- generated by git-patchbot for /home/xen/git/xen.git#master
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |