[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-changelog] [xen staging] bitops: speed up hweight<N>()



commit aad7bdae14676f07d6e7fec70941c72078d01562
Author:     Jan Beulich <jbeulich@xxxxxxxx>
AuthorDate: Mon Jun 3 17:20:13 2019 +0200
Commit:     Jan Beulich <jbeulich@xxxxxxxx>
CommitDate: Mon Jun 3 17:20:13 2019 +0200

    bitops: speed up hweight<N>()
    
    Algorithmically this gets us in line with current Linux, where the same
    change did happen about 13 years ago. See in particular Linux commits
    f9b4192923 ("bitops: hweight() speedup") and 0136611c62 ("optimize
    hweight64 for x86_64").
    
    Kconfig changes for actually setting HAVE_FAST_MULTIPLY will follow.
    
    Take the opportunity and change generic_hweight64()'s return type to
    unsigned int.
    
    Suggested-by: Andrew Cooper <andrew.cooper3@xxxxxxxxxx>
    Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
    Reviewed-by: Andrew Cooper <andrew.cooper3@xxxxxxxxxx>
---
 xen/common/Kconfig       |  3 +++
 xen/include/xen/bitops.h | 51 +++++++++++++++++++++++++++++-------------------
 2 files changed, 34 insertions(+), 20 deletions(-)

diff --git a/xen/common/Kconfig b/xen/common/Kconfig
index 7a12346f19..10a759b31f 100644
--- a/xen/common/Kconfig
+++ b/xen/common/Kconfig
@@ -31,6 +31,9 @@ config HAS_DEVICE_TREE
 config HAS_EX_TABLE
        bool
 
+config HAS_FAST_MULTIPLY
+       bool
+
 config MEM_ACCESS_ALWAYS_ON
        bool
 
diff --git a/xen/include/xen/bitops.h b/xen/include/xen/bitops.h
index a103e49089..b512800dd9 100644
--- a/xen/include/xen/bitops.h
+++ b/xen/include/xen/bitops.h
@@ -153,41 +153,52 @@ static __inline__ int get_count_order(unsigned int count)
 
 static inline unsigned int generic_hweight32(unsigned int w)
 {
-    unsigned int res = (w & 0x55555555) + ((w >> 1) & 0x55555555);
-    res = (res & 0x33333333) + ((res >> 2) & 0x33333333);
-    res = (res & 0x0F0F0F0F) + ((res >> 4) & 0x0F0F0F0F);
-    res = (res & 0x00FF00FF) + ((res >> 8) & 0x00FF00FF);
-    return (res & 0x0000FFFF) + ((res >> 16) & 0x0000FFFF);
+    w -= (w >> 1) & 0x55555555;
+    w =  (w & 0x33333333) + ((w >> 2) & 0x33333333);
+    w =  (w + (w >> 4)) & 0x0f0f0f0f;
+
+    if ( IS_ENABLED(CONFIG_HAS_FAST_MULTIPLY) )
+        return (w * 0x01010101) >> 24;
+
+    w += w >> 8;
+
+    return (w + (w >> 16)) & 0xff;
 }
 
 static inline unsigned int generic_hweight16(unsigned int w)
 {
-    unsigned int res = (w & 0x5555) + ((w >> 1) & 0x5555);
-    res = (res & 0x3333) + ((res >> 2) & 0x3333);
-    res = (res & 0x0F0F) + ((res >> 4) & 0x0F0F);
-    return (res & 0x00FF) + ((res >> 8) & 0x00FF);
+    w -= ((w >> 1) & 0x5555);
+    w =  (w & 0x3333) + ((w >> 2) & 0x3333);
+    w =  (w + (w >> 4)) & 0x0f0f;
+
+    return (w + (w >> 8)) & 0xff;
 }
 
 static inline unsigned int generic_hweight8(unsigned int w)
 {
-    unsigned int res = (w & 0x55) + ((w >> 1) & 0x55);
-    res = (res & 0x33) + ((res >> 2) & 0x33);
-    return (res & 0x0F) + ((res >> 4) & 0x0F);
+    w -= ((w >> 1) & 0x55);
+    w =  (w & 0x33) + ((w >> 2) & 0x33);
+
+    return (w + (w >> 4)) & 0x0f;
 }
 
-static inline unsigned long generic_hweight64(__u64 w)
+static inline unsigned int generic_hweight64(uint64_t w)
 {
 #if BITS_PER_LONG < 64
     return generic_hweight32((unsigned int)(w >> 32)) +
         generic_hweight32((unsigned int)w);
 #else
-    u64 res;
-    res = (w & 0x5555555555555555ul) + ((w >> 1) & 0x5555555555555555ul);
-    res = (res & 0x3333333333333333ul) + ((res >> 2) & 0x3333333333333333ul);
-    res = (res & 0x0F0F0F0F0F0F0F0Ful) + ((res >> 4) & 0x0F0F0F0F0F0F0F0Ful);
-    res = (res & 0x00FF00FF00FF00FFul) + ((res >> 8) & 0x00FF00FF00FF00FFul);
-    res = (res & 0x0000FFFF0000FFFFul) + ((res >> 16) & 0x0000FFFF0000FFFFul);
-    return (res & 0x00000000FFFFFFFFul) + ((res >> 32) & 0x00000000FFFFFFFFul);
+    w -= (w >> 1) & 0x5555555555555555ul;
+    w =  (w & 0x3333333333333333ul) + ((w >> 2) & 0x3333333333333333ul);
+    w =  (w + (w >> 4)) & 0x0f0f0f0f0f0f0f0ful;
+
+    if ( IS_ENABLED(CONFIG_HAS_FAST_MULTIPLY) )
+        return (w * 0x0101010101010101ul) >> 56;
+
+    w += w >> 8;
+    w += w >> 16;
+
+    return (w + (w >> 32)) & 0xFF;
 #endif
 }
 
--
generated by git-patchbot for /home/xen/git/xen.git#staging

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxxx
https://lists.xenproject.org/xen-changelog

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.