[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-API] [PATCH] add new functions to efficiently scan for sparseness



# HG changeset patch
# User David Scott <dave.scott@xxxxxxxxxxxxx>
# Date 1279751378 -3600
# Node ID d052a41ffabe74f791a1d62bd178a1cac13dc332
# Parent  542efa53c3e25342a175ead7f9327b972f976820
Add functions Zerocheck.get_next{zero,nonzero} which can be used to detect 
sparseness (relatively efficiently). Both functions primarily use aligned 
unsigned int-sized accesses.

Signed-off-by: David Scott <dave.scott@xxxxxxxxxxxxx>

diff -r 542efa53c3e2 -r d052a41ffabe stdext/zerocheck.ml
--- a/stdext/zerocheck.ml       Wed Jul 21 23:29:15 2010 +0100
+++ b/stdext/zerocheck.ml       Wed Jul 21 23:29:38 2010 +0100
@@ -12,3 +12,30 @@
  * GNU Lesser General Public License for more details.
  *)
 external is_all_zeros : string -> int -> bool = "is_all_zeros"
+
+external _find_a_nonzero : string -> int -> int -> int = "find_a_nonzero"
+external _find_a_zero : string -> int -> int -> int = "find_a_zero"
+
+let wrap f x len offset =
+       let remaining = len - offset in
+       if remaining <= 0 then raise (Invalid_argument "offset > length");
+       let result = f x offset remaining in
+       if result = remaining then None else Some (result + offset)
+
+let find_a_nonzero = wrap _find_a_nonzero
+let find_a_zero = wrap _find_a_zero
+
+let fold_over_nonzeros x len roundup f initial = 
+       let rec inner acc offset = 
+               if offset = len then acc
+               else
+               match find_a_nonzero x len offset with
+               | None -> acc (* no more *)
+               | Some s -> 
+                       let e = match find_a_zero x len s with
+                       | None -> len
+                       | Some e -> e in
+                       let e = min len (roundup e) in
+                       inner (f acc (s, e - s)) e in
+       inner initial 0
+
diff -r 542efa53c3e2 -r d052a41ffabe stdext/zerocheck.mli
--- a/stdext/zerocheck.mli      Wed Jul 21 23:29:15 2010 +0100
+++ b/stdext/zerocheck.mli      Wed Jul 21 23:29:38 2010 +0100
@@ -11,4 +11,24 @@
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU Lesser General Public License for more details.
  *)
+
+(** [is_all_zeroes x len] returns true if the substring is all zeroes *)
 external is_all_zeros : string -> int -> bool = "is_all_zeros"
+
+(** [find_a_zero x len offset] returns the offset in [x] of a zero
+    character after [offset], or None if no zero was detected.
+    Note this function is approximate and is not guaranteed to find
+    strictly the first zero. *)
+val find_a_zero: string -> int -> int -> int option
+
+(** [find_a_nonzero x len offset] returns the offset in [x] of a 
+    nonzero character after [offset], or None if none could be detected.
+    Note this function is approximate and is not guaranteed to find
+    strictly the first nonzero. *)
+val find_a_nonzero: string -> int -> int -> int option
+
+(** [fold_over_nonzeros buf len roundup f initial] folds [f] over all 
+    (start, length) pairs of non-zero data in string [buf] up to [len]. 
+    The end offset of each pair is rounded up with [roundup] (e.g. to 
+    potential block boudaries. *)
+val fold_over_nonzeros: string -> int -> (int -> int) -> ('a -> int * int -> 
'a) -> 'a -> 'a
diff -r 542efa53c3e2 -r d052a41ffabe stdext/zerocheck_stub.c
--- a/stdext/zerocheck_stub.c   Wed Jul 21 23:29:15 2010 +0100
+++ b/stdext/zerocheck_stub.c   Wed Jul 21 23:29:38 2010 +0100
@@ -18,6 +18,88 @@
 #include <caml/signals.h>
 #include <caml/fail.h>
 
+#define OFFSET(s) (((unsigned int)s) & (sizeof(unsigned int) - 1))
+
+
+/* Return the offset of the next non-zero byte (possibly rounded down a bit).
+   The value 'remaining' is returned if there is no non-zero byte found. */
+value find_a_nonzero(value string, value offset, value remaining)
+{
+       CAMLparam3(string, offset, remaining);
+       int c_offset = Int_val(offset);
+       int c_remaining = Int_val(remaining);
+       int c_origremaining = c_remaining;
+       char *c_string = String_val(string);
+       char *s = c_string + c_offset;
+
+       /* Go character by character until we hit an unsigned int boundary */
+       while ((OFFSET(s) != 0) && (c_remaining > 0)){ 
+               if (*s != '\000') goto finish;
+               s++; c_remaining--;
+       }
+       /* Go word by word. Note we don't need to determine the exact position
+          of the nonzero, it suffices to return the index of the word 
containing       
+          the nonzero. */
+       unsigned int *p = (unsigned int *)s;
+       while (c_remaining > 4){
+               if (*p != 0) goto finish;
+               p++; c_remaining-=4;
+       }
+       /* Go character by character until the end of the string */
+       s = (char*) p;
+       while (c_remaining > 0){
+               if (*s != '\000') goto finish;
+               s++; c_remaining--;
+       }
+       /* c_remaining == 0 */
+ finish:
+       /* If we didn't find a nonzero then we return c_origremaining.
+          If we did then we return the number of chars after the starting
+          offset where the word containing the nonzero was detected. */
+       CAMLreturn(Val_int(c_origremaining - c_remaining));
+                              
+}
+
+/* Return the offset of the next zero byte (possibly rounded up a bit).
+   The value 'remaining' is returned if there is no zero byte found. */
+value find_a_zero(value string, value offset, value remaining)
+{
+       CAMLparam3(string, offset, remaining);
+       int c_offset = Int_val(offset);
+       int c_remaining = Int_val(remaining);
+       int c_origremaining = c_remaining;
+       char *c_string = String_val(string);
+       char *s = c_string + c_offset;
+
+       /* Go character by character until we hit an unsigned int boundary */
+       while ((OFFSET(s) != 0) && (c_remaining > 0)){ 
+               if (*s == '\000') goto finish;
+               s++; c_remaining--;
+       }
+       /* Go word by word. Note we don't need to determine the exact position
+          of the zero, it suffices to return the index of the word following   
+          the zero. */
+       unsigned int *p = (unsigned int *)s;
+       while (c_remaining > 4){
+               if (*p == 0) goto finish;
+               p++; c_remaining-=4;
+       }
+       /* Go character by character until the end of the string */
+       s = (char*) p;
+       while (c_remaining > 0){
+               if (*s == '\000') goto finish;
+               s++; c_remaining--;
+       }
+       /* c_remaining == 0 */
+ finish:
+       /* If we didn't find a zero then we return c_origremaining.
+          If we did then we return the number of chars after the starting
+          offset where the word containing the zero was detected. */
+       CAMLreturn(Val_int(c_origremaining - c_remaining));                     
       
+}
+
+
+
 /* for better performance in all case, we should process the unalign data at
  * the beginning until we reach a 32 bit align value, however since ocaml
  * allocate the string and we don't use any offset in this string, the string
 stdext/zerocheck.ml     |  27 ++++++++++++++++
 stdext/zerocheck.mli    |  20 +++++++++++
 stdext/zerocheck_stub.c |  82 +++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 129 insertions(+), 0 deletions(-)


Attachment: xen-api-libs.hg.patch
Description: Text Data

_______________________________________________
xen-api mailing list
xen-api@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/mailman/listinfo/xen-api

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.