make fls() and ffs() consistent across architectures Their parameter types differed between ARM and x86. Along with generalizing the functions this fixes - x86's non-long functions having long parameter types - ARM's ffs() using a long intermediate variable - generic_fls64() being broken when the upper half of the input is non-zero - common (and in one case also ARM) code using fls() when flsl() was meant Also drop ARM's constant_fls() in favor of the identical generic_fls(). Signed-off-by: Jan Beulich --- unstable.orig/xen/common/page_alloc.c 2014-11-07 17:16:38.000000000 +0100 +++ unstable/xen/common/page_alloc.c 2015-01-21 08:53:14.000000000 +0100 @@ -278,7 +278,7 @@ unsigned long __init alloc_boot_pages( #define bits_to_zone(b) (((b) < (PAGE_SHIFT + 1)) ? 1 : ((b) - PAGE_SHIFT)) #define page_to_zone(pg) (is_xen_heap_page(pg) ? MEMZONE_XEN : \ - (fls(page_to_mfn(pg)) ? : 1)) + (flsl(page_to_mfn(pg)) ? : 1)) typedef struct page_list_head heap_by_zone_and_order_t[NR_ZONES][MAX_ORDER+1]; static heap_by_zone_and_order_t *_heap[MAX_NUMNODES]; @@ -1259,7 +1259,7 @@ void __init end_boot_allocator(void) { #ifdef CONFIG_X86 dma_bitsize = min_t(unsigned int, - fls(NODE_DATA(0)->node_spanned_pages) - 1 + flsl(NODE_DATA(0)->node_spanned_pages) - 1 + PAGE_SHIFT - 2, 32); #else @@ -1544,7 +1544,7 @@ static unsigned int __read_mostly xenhea void __init xenheap_max_mfn(unsigned long mfn) { - xenheap_bits = fls(mfn) + PAGE_SHIFT; + xenheap_bits = flsl(mfn) + PAGE_SHIFT; } void init_xenheap_pages(paddr_t ps, paddr_t pe) --- unstable.orig/xen/common/xmalloc_tlsf.c 2015-01-22 13:28:41.000000000 +0100 +++ unstable/xen/common/xmalloc_tlsf.c 2015-01-21 08:53:05.000000000 +0100 @@ -138,9 +138,9 @@ static inline void MAPPING_SEARCH(unsign } else { - t = (1 << (fls(*r) - 1 - MAX_LOG2_SLI)) - 1; + t = (1 << (flsl(*r) - 1 - MAX_LOG2_SLI)) - 1; *r = *r + t; - *fl = fls(*r) - 1; + *fl = flsl(*r) - 1; *sl = (*r >> (*fl - MAX_LOG2_SLI)) - MAX_SLI; *fl -= FLI_OFFSET; /*if ((*fl -= FLI_OFFSET) < 0) // FL will be always >0! @@ -164,7 +164,7 @@ static inline void MAPPING_INSERT(unsign } else { - *fl = fls(r) - 1; + *fl = flsl(r) - 1; *sl = (r >> (*fl - MAX_LOG2_SLI)) - MAX_SLI; *fl -= FLI_OFFSET; } --- unstable.orig/xen/include/asm-arm/arm32/bitops.h 2015-01-22 13:28:41.000000000 +0100 +++ unstable/xen/include/asm-arm/arm32/bitops.h 2015-01-21 10:06:01.000000000 +0100 @@ -15,6 +15,8 @@ extern int _test_and_change_bit(int nr, #define test_and_clear_bit(n,p) _test_and_clear_bit(n,p) #define test_and_change_bit(n,p) _test_and_change_bit(n,p) +#define flsl fls + /* * Little endian assembly bitops. nr = 0 -> byte 0 bit 0. */ --- unstable.orig/xen/include/asm-arm/arm64/bitops.h 2015-01-22 13:28:41.000000000 +0100 +++ unstable/xen/include/asm-arm/arm64/bitops.h 2015-01-21 08:28:43.000000000 +0100 @@ -32,6 +32,17 @@ static /*__*/always_inline unsigned long */ #define ffz(x) __ffs(~(x)) +static inline int flsl(unsigned long x) +{ + int ret; + + if (__builtin_constant_p(x)) + return generic_flsl(x); + + asm("clz\t%0, %1" : "=r" (ret) : "r" (x)); + return BITS_PER_LONG - ret; +} + /* Based on linux/include/asm-generic/bitops/find.h */ #ifndef find_next_bit --- unstable.orig/xen/include/asm-arm/bitops.h 2015-01-22 13:28:41.000000000 +0100 +++ unstable/xen/include/asm-arm/bitops.h 2015-01-22 13:30:17.000000000 +0100 @@ -99,46 +99,17 @@ static inline int test_bit(int nr, const return 1UL & (p[BIT_WORD(nr)] >> (nr & (BITS_PER_WORD-1))); } -static inline int constant_fls(int x) -{ - int r = 32; - - if (!x) - return 0; - if (!(x & 0xffff0000u)) { - x <<= 16; - r -= 16; - } - if (!(x & 0xff000000u)) { - x <<= 8; - r -= 8; - } - if (!(x & 0xf0000000u)) { - x <<= 4; - r -= 4; - } - if (!(x & 0xc0000000u)) { - x <<= 2; - r -= 2; - } - if (!(x & 0x80000000u)) { - x <<= 1; - r -= 1; - } - return r; -} - /* * On ARMv5 and above those functions can be implemented around * the clz instruction for much better code efficiency. */ -static inline int fls(int x) +static inline int fls(unsigned int x) { int ret; if (__builtin_constant_p(x)) - return constant_fls(x); + return generic_fls(x); asm("clz\t%0, %1" : "=r" (ret) : "r" (x)); ret = BITS_PER_LONG - ret; @@ -146,7 +117,8 @@ static inline int fls(int x) } -#define ffs(x) ({ unsigned long __t = (x); fls(__t & -__t); }) +#define ffs(x) ({ unsigned int __t = (x); fls(__t & -__t); }) +#define ffsl(x) ({ unsigned long __t = (x); flsl(__t & -__t); }) /** * find_first_set_bit - find the first set bit in @word @@ -157,7 +129,7 @@ static inline int fls(int x) */ static inline unsigned int find_first_set_bit(unsigned long word) { - return ffs(word) - 1; + return ffsl(word) - 1; } /** --- unstable.orig/xen/include/asm-x86/bitops.h 2014-09-15 15:42:35.000000000 +0200 +++ unstable/xen/include/asm-x86/bitops.h 2015-01-22 13:30:02.000000000 +0100 @@ -401,7 +401,7 @@ static inline unsigned int find_first_se * * This is defined the same way as the libc and compiler builtin ffs routines. */ -static inline int ffs(unsigned long x) +static inline int ffsl(unsigned long x) { long r; @@ -412,13 +412,24 @@ static inline int ffs(unsigned long x) return (int)r+1; } +static inline int ffs(unsigned int x) +{ + int r; + + asm ( "bsf %1,%0\n\t" + "jnz 1f\n\t" + "mov $-1,%0\n" + "1:" : "=r" (r) : "rm" (x)); + return r + 1; +} + /** * fls - find last bit set * @x: the word to search * * This is defined the same way as ffs. */ -static inline int fls(unsigned long x) +static inline int flsl(unsigned long x) { long r; @@ -429,8 +440,16 @@ static inline int fls(unsigned long x) return (int)r+1; } -#define ffs64 ffs -#define fls64 fls +static inline int fls(unsigned int x) +{ + int r; + + asm ( "bsr %1,%0\n\t" + "jnz 1f\n\t" + "mov $-1,%0\n" + "1:" : "=r" (r) : "rm" (x)); + return r + 1; +} /** * hweightN - returns the hamming weight of a N-bit word --- unstable.orig/xen/include/xen/bitops.h 2015-01-22 13:28:41.000000000 +0100 +++ unstable/xen/include/xen/bitops.h 2015-01-22 13:30:46.000000000 +0100 @@ -70,20 +70,52 @@ static __inline__ int generic_fls(int x) return r; } +#if BITS_PER_LONG == 64 + +static inline int generic_ffsl(unsigned long x) +{ + return !x || (u32)x ? generic_ffs(x) : generic_ffs(x >> 32) + 32; +} + +static inline int generic_flsl(unsigned long x) +{ + u32 h = x >> 32; + + return h ? generic_fls(h) + 32 : generic_fls(x); +} + +#else +# define generic_ffsl generic_ffs +# define generic_flsl generic_fls +#endif + /* * Include this here because some architectures need generic_ffs/fls in * scope */ #include - +#if BITS_PER_LONG == 64 +# define fls64 flsl +# define ffs64 ffsl +#else +# ifndef ffs64 +static inline int generic_ffs64(__u64 x) +{ + return !x || (__u32)x ? ffs(x) : ffs(x >> 32) + 32; +} +# define ffs64 generic_ffs64 +# endif +# ifndef fls64 static inline int generic_fls64(__u64 x) { __u32 h = x >> 32; - if (h) - return fls(x) + 32; - return fls(x); + + return h ? fls(h) + 32 : fls(x); } +# define fls64 generic_fls64 +# endif +#endif static __inline__ int get_bitmask_order(unsigned int count) {