make fls() and ffs() consistent across architectures

Their parameter types differed between ARM and x86.

Along with generalizing the functions this fixes
- x86's non-long functions having long parameter types
- ARM's ffs() using a long intermediate variable
- generic_fls64() being broken when the upper half of the input is
  non-zero
- common (and in one case also ARM) code using fls() when flsl() was
  meant

Also drop ARM's constant_fls() in favor of the identical generic_fls().

Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>

--- unstable.orig/xen/common/page_alloc.c	2014-11-07 17:16:38.000000000 +0100
+++ unstable/xen/common/page_alloc.c	2015-01-21 08:53:14.000000000 +0100
@@ -278,7 +278,7 @@ unsigned long __init alloc_boot_pages(
 
 #define bits_to_zone(b) (((b) < (PAGE_SHIFT + 1)) ? 1 : ((b) - PAGE_SHIFT))
 #define page_to_zone(pg) (is_xen_heap_page(pg) ? MEMZONE_XEN :  \
-                          (fls(page_to_mfn(pg)) ? : 1))
+                          (flsl(page_to_mfn(pg)) ? : 1))
 
 typedef struct page_list_head heap_by_zone_and_order_t[NR_ZONES][MAX_ORDER+1];
 static heap_by_zone_and_order_t *_heap[MAX_NUMNODES];
@@ -1259,7 +1259,7 @@ void __init end_boot_allocator(void)
     {
 #ifdef CONFIG_X86
         dma_bitsize = min_t(unsigned int,
-                            fls(NODE_DATA(0)->node_spanned_pages) - 1
+                            flsl(NODE_DATA(0)->node_spanned_pages) - 1
                             + PAGE_SHIFT - 2,
                             32);
 #else
@@ -1544,7 +1544,7 @@ static unsigned int __read_mostly xenhea
 
 void __init xenheap_max_mfn(unsigned long mfn)
 {
-    xenheap_bits = fls(mfn) + PAGE_SHIFT;
+    xenheap_bits = flsl(mfn) + PAGE_SHIFT;
 }
 
 void init_xenheap_pages(paddr_t ps, paddr_t pe)
--- unstable.orig/xen/common/xmalloc_tlsf.c	2015-01-22 13:28:41.000000000 +0100
+++ unstable/xen/common/xmalloc_tlsf.c	2015-01-21 08:53:05.000000000 +0100
@@ -138,9 +138,9 @@ static inline void MAPPING_SEARCH(unsign
     }
     else
     {
-        t = (1 << (fls(*r) - 1 - MAX_LOG2_SLI)) - 1;
+        t = (1 << (flsl(*r) - 1 - MAX_LOG2_SLI)) - 1;
         *r = *r + t;
-        *fl = fls(*r) - 1;
+        *fl = flsl(*r) - 1;
         *sl = (*r >> (*fl - MAX_LOG2_SLI)) - MAX_SLI;
         *fl -= FLI_OFFSET;
         /*if ((*fl -= FLI_OFFSET) < 0) // FL will be always >0!
@@ -164,7 +164,7 @@ static inline void MAPPING_INSERT(unsign
     }
     else
     {
-        *fl = fls(r) - 1;
+        *fl = flsl(r) - 1;
         *sl = (r >> (*fl - MAX_LOG2_SLI)) - MAX_SLI;
         *fl -= FLI_OFFSET;
     }
--- unstable.orig/xen/include/asm-arm/arm32/bitops.h	2015-01-22 13:28:41.000000000 +0100
+++ unstable/xen/include/asm-arm/arm32/bitops.h	2015-01-21 10:06:01.000000000 +0100
@@ -15,6 +15,8 @@ extern int _test_and_change_bit(int nr, 
 #define test_and_clear_bit(n,p)   _test_and_clear_bit(n,p)
 #define test_and_change_bit(n,p)  _test_and_change_bit(n,p)
 
+#define flsl fls
+
 /*
  * Little endian assembly bitops.  nr = 0 -> byte 0 bit 0.
  */
--- unstable.orig/xen/include/asm-arm/arm64/bitops.h	2015-01-22 13:28:41.000000000 +0100
+++ unstable/xen/include/asm-arm/arm64/bitops.h	2015-01-21 08:28:43.000000000 +0100
@@ -32,6 +32,17 @@ static /*__*/always_inline unsigned long
  */
 #define ffz(x)  __ffs(~(x))
 
+static inline int flsl(unsigned long x)
+{
+        int ret;
+
+        if (__builtin_constant_p(x))
+               return generic_flsl(x);
+
+        asm("clz\t%0, %1" : "=r" (ret) : "r" (x));
+        return BITS_PER_LONG - ret;
+}
+
 /* Based on linux/include/asm-generic/bitops/find.h */
 
 #ifndef find_next_bit
--- unstable.orig/xen/include/asm-arm/bitops.h	2015-01-22 13:28:41.000000000 +0100
+++ unstable/xen/include/asm-arm/bitops.h	2015-01-22 13:30:17.000000000 +0100
@@ -99,46 +99,17 @@ static inline int test_bit(int nr, const
         return 1UL & (p[BIT_WORD(nr)] >> (nr & (BITS_PER_WORD-1)));
 }
 
-static inline int constant_fls(int x)
-{
-        int r = 32;
-
-        if (!x)
-                return 0;
-        if (!(x & 0xffff0000u)) {
-                x <<= 16;
-                r -= 16;
-        }
-        if (!(x & 0xff000000u)) {
-                x <<= 8;
-                r -= 8;
-        }
-        if (!(x & 0xf0000000u)) {
-                x <<= 4;
-                r -= 4;
-        }
-        if (!(x & 0xc0000000u)) {
-                x <<= 2;
-                r -= 2;
-        }
-        if (!(x & 0x80000000u)) {
-                x <<= 1;
-                r -= 1;
-        }
-        return r;
-}
-
 /*
  * On ARMv5 and above those functions can be implemented around
  * the clz instruction for much better code efficiency.
  */
 
-static inline int fls(int x)
+static inline int fls(unsigned int x)
 {
         int ret;
 
         if (__builtin_constant_p(x))
-               return constant_fls(x);
+               return generic_fls(x);
 
         asm("clz\t%0, %1" : "=r" (ret) : "r" (x));
         ret = BITS_PER_LONG - ret;
@@ -146,7 +117,8 @@ static inline int fls(int x)
 }
 
 
-#define ffs(x) ({ unsigned long __t = (x); fls(__t & -__t); })
+#define ffs(x) ({ unsigned int __t = (x); fls(__t & -__t); })
+#define ffsl(x) ({ unsigned long __t = (x); flsl(__t & -__t); })
 
 /**
  * find_first_set_bit - find the first set bit in @word
@@ -157,7 +129,7 @@ static inline int fls(int x)
  */
 static inline unsigned int find_first_set_bit(unsigned long word)
 {
-        return ffs(word) - 1;
+        return ffsl(word) - 1;
 }
 
 /**
--- unstable.orig/xen/include/asm-x86/bitops.h	2014-09-15 15:42:35.000000000 +0200
+++ unstable/xen/include/asm-x86/bitops.h	2015-01-22 13:30:02.000000000 +0100
@@ -401,7 +401,7 @@ static inline unsigned int find_first_se
  *
  * This is defined the same way as the libc and compiler builtin ffs routines.
  */
-static inline int ffs(unsigned long x)
+static inline int ffsl(unsigned long x)
 {
     long r;
 
@@ -412,13 +412,24 @@ static inline int ffs(unsigned long x)
     return (int)r+1;
 }
 
+static inline int ffs(unsigned int x)
+{
+    int r;
+
+    asm ( "bsf %1,%0\n\t"
+          "jnz 1f\n\t"
+          "mov $-1,%0\n"
+          "1:" : "=r" (r) : "rm" (x));
+    return r + 1;
+}
+
 /**
  * fls - find last bit set
  * @x: the word to search
  *
  * This is defined the same way as ffs.
  */
-static inline int fls(unsigned long x)
+static inline int flsl(unsigned long x)
 {
     long r;
 
@@ -429,8 +440,16 @@ static inline int fls(unsigned long x)
     return (int)r+1;
 }
 
-#define ffs64 ffs
-#define fls64 fls
+static inline int fls(unsigned int x)
+{
+    int r;
+
+    asm ( "bsr %1,%0\n\t"
+          "jnz 1f\n\t"
+          "mov $-1,%0\n"
+          "1:" : "=r" (r) : "rm" (x));
+    return r + 1;
+}
 
 /**
  * hweightN - returns the hamming weight of a N-bit word
--- unstable.orig/xen/include/xen/bitops.h	2015-01-22 13:28:41.000000000 +0100
+++ unstable/xen/include/xen/bitops.h	2015-01-22 13:30:46.000000000 +0100
@@ -70,20 +70,52 @@ static __inline__ int generic_fls(int x)
     return r;
 }
 
+#if BITS_PER_LONG == 64
+
+static inline int generic_ffsl(unsigned long x)
+{
+    return !x || (u32)x ? generic_ffs(x) : generic_ffs(x >> 32) + 32;
+}
+
+static inline int generic_flsl(unsigned long x)
+{
+    u32 h = x >> 32;
+
+    return h ? generic_fls(h) + 32 : generic_fls(x);
+}
+
+#else
+# define generic_ffsl generic_ffs
+# define generic_flsl generic_fls
+#endif
+
 /*
  * Include this here because some architectures need generic_ffs/fls in
  * scope
  */
 #include <asm/bitops.h>
 
-
+#if BITS_PER_LONG == 64
+# define fls64 flsl
+# define ffs64 ffsl
+#else
+# ifndef ffs64
+static inline int generic_ffs64(__u64 x)
+{
+    return !x || (__u32)x ? ffs(x) : ffs(x >> 32) + 32;
+}
+#  define ffs64 generic_ffs64
+# endif
+# ifndef fls64
 static inline int generic_fls64(__u64 x)
 {
     __u32 h = x >> 32;
-    if (h)
-        return fls(x) + 32;
-    return fls(x);
+
+    return h ? fls(h) + 32 : fls(x);
 }
+#  define fls64 generic_fls64
+# endif
+#endif
 
 static __inline__ int get_bitmask_order(unsigned int count)
 {