[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Minios-devel] [UNIKRAFT PATCH v2] lib/ukallocbbuddy: Correct region bitmap allocation and usage



Hello,

Please find the comments inline

On 06/25/2018 01:11 PM, Sharan Santhanam wrote:
Hello,

Please find some of comments inline:

On 06/23/2018 07:14 PM, Costin Lupu wrote:
Hi Yuri,

Thanks a lot for your comments. Please see my replies inline.

On 06/22/2018 08:03 PM, Yuri Volchkov wrote:
Hey Costin,

there are some comments inline.

-Yuri.

Costin Lupu <costin.lupu@xxxxxxxxx> writes:

The usage of a each memory region that is added to the binary
buddy allocator is tracked with a bitmap. This patch corrects
wrong size calculation for the bitmap and wrong calculations
of bit postitions.

Signed-off-by: Costin Lupu <costin.lupu@xxxxxxxxx>
---
  lib/ukallocbbuddy/bbuddy.c | 83 +++++++++++++++++++++++++++++++---------------
  1 file changed, 57 insertions(+), 26 deletions(-)

diff --git a/lib/ukallocbbuddy/bbuddy.c b/lib/ukallocbbuddy/bbuddy.c
index 20a9b70..63288f0 100644
--- a/lib/ukallocbbuddy/bbuddy.c
+++ b/lib/ukallocbbuddy/bbuddy.c
@@ -69,7 +69,7 @@ struct uk_bbpalloc_memr {
      unsigned long first_page;
      unsigned long nr_pages;
      unsigned long mm_alloc_bitmap_size;
-    unsigned long mm_alloc_bitmap[];
+    unsigned long *mm_alloc_bitmap;
  };
  struct uk_bbpalloc {
@@ -93,10 +93,11 @@ struct uk_bbpalloc {
   *  *_idx == Index into `mm_alloc_bitmap' array.
   *  *_off == Bit offset within an element of the `mm_alloc_bitmap' array.
   */
-#define PAGES_PER_MAPWORD (sizeof(unsigned long) * 8)
+#define BITS_PER_BYTE     8
+#define PAGES_PER_MAPWORD (sizeof(unsigned long) * BITS_PER_BYTE)
  static inline struct uk_bbpalloc_memr *map_get_memr(struct uk_bbpalloc *b,
-                            unsigned long page_num)
+                            unsigned long page_va)
  {
      struct uk_bbpalloc_memr *memr = NULL;
@@ -106,8 +107,9 @@ static inline struct uk_bbpalloc_memr *map_get_memr(struct uk_bbpalloc *b,
       * of them. It should be just one region in most cases
       */
      for (memr = b->memr_head; memr != NULL; memr = memr->next) {
-        if ((page_num >= memr->first_page)
-            && (page_num < (memr->first_page + memr->nr_pages)))
+        if ((page_va >= memr->first_page)
+            && (page_va < (memr->first_page +
+            memr->nr_pages * __PAGE_SIZE)))
Is not a huge performance improvement, but better to use
    memr->nr_pages << __PAGE_SHIFT

Agreed, although I believe it is easier to understand it using the
multiplication.

              return memr;
      }
@@ -117,24 +119,29 @@ static inline struct uk_bbpalloc_memr *map_get_memr(struct uk_bbpalloc *b,
      return NULL;
  }
-static inline int allocated_in_map(struct uk_bbpalloc *b,
-                   unsigned long page_num)
+static inline unsigned long allocated_in_map(struct uk_bbpalloc *b,
+                   unsigned long page_va)
  {
-    struct uk_bbpalloc_memr *memr = map_get_memr(b, page_num);
+    struct uk_bbpalloc_memr *memr = map_get_memr(b, page_va);
+    unsigned long page_idx;
+    unsigned long bm_idx, bm_off;
      /* treat pages outside of region as allocated */
      if (!memr)
          return 1;
-    page_num -= memr->first_page;
-    return ((memr)->mm_alloc_bitmap[(page_num) / PAGES_PER_MAPWORD]
-        & (1UL << ((page_num) & (PAGES_PER_MAPWORD - 1))));
+    page_idx = (page_va - memr->first_page) / __PAGE_SIZE;
+    bm_idx = page_idx / PAGES_PER_MAPWORD;
+    bm_off = page_idx & (PAGES_PER_MAPWORD - 1);
+
+    return ((memr)->mm_alloc_bitmap[bm_idx] & (1UL << bm_off));
  }
  static void map_alloc(struct uk_bbpalloc *b, uintptr_t first_page,
                unsigned long nr_pages)
  {
      struct uk_bbpalloc_memr *memr;
+    unsigned long first_page_idx, end_page_idx;
      unsigned long start_off, end_off, curr_idx, end_idx;
      /*
@@ -144,14 +151,16 @@ static void map_alloc(struct uk_bbpalloc *b, uintptr_t first_page,
       */
      memr = map_get_memr(b, first_page);
      UK_ASSERT(memr != NULL);
-    UK_ASSERT((first_page + nr_pages)
-          <= (memr->first_page + memr->nr_pages));
+    UK_ASSERT((first_page + nr_pages * __PAGE_SIZE)
+          <= (memr->first_page + memr->nr_pages * __PAGE_SIZE));
      first_page -= memr->first_page;
-    curr_idx = first_page / PAGES_PER_MAPWORD;
-    start_off = first_page & (PAGES_PER_MAPWORD - 1);
-    end_idx = (first_page + nr_pages) / PAGES_PER_MAPWORD;
-    end_off = (first_page + nr_pages) & (PAGES_PER_MAPWORD - 1);
+    first_page_idx = first_page / __PAGE_SIZE;
+    curr_idx = first_page_idx / PAGES_PER_MAPWORD;
+    start_off = first_page_idx & (PAGES_PER_MAPWORD - 1);
+    end_page_idx = first_page_idx + nr_pages;
+    end_idx = end_page_idx / PAGES_PER_MAPWORD;
+    end_off = end_page_idx & (PAGES_PER_MAPWORD - 1);
Same preference of shifts over divisions. I stop pointing it out here,
but I think using shift in other places in the code would improve a
little bit performance, and readability.

Actually, in my opinion the division is more readable than the shifting.
Other than that, I agree.


In this case, I agree with Yuri bit shifting and bit masking makes it easier to comprehend.

Since we repeatedly using the operation to find the index, offset and start of the page across this library, it might be wise to introduces macros or inline functions.
      if (curr_idx == end_idx) {
          memr->mm_alloc_bitmap[curr_idx] |=
@@ -170,6 +179,7 @@ static void map_free(struct uk_bbpalloc *b, uintptr_t first_page,
               unsigned long nr_pages)
  {
      struct uk_bbpalloc_memr *memr;
+    unsigned long first_page_idx, end_page_idx;
      unsigned long start_off, end_off, curr_idx, end_idx;
      /*
@@ -179,14 +189,16 @@ static void map_free(struct uk_bbpalloc *b, uintptr_t first_page,
       */
      memr = map_get_memr(b, first_page);
      UK_ASSERT(memr != NULL);
-    UK_ASSERT((first_page + nr_pages)
-          <= (memr->first_page + memr->nr_pages));
+    UK_ASSERT((first_page + nr_pages * __PAGE_SIZE)
+          <= (memr->first_page + memr->nr_pages * __PAGE_SIZE));
      first_page -= memr->first_page;
-    curr_idx = first_page / PAGES_PER_MAPWORD;
-    start_off = first_page & (PAGES_PER_MAPWORD - 1);
-    end_idx = (first_page + nr_pages) / PAGES_PER_MAPWORD;
-    end_off = (first_page + nr_pages) & (PAGES_PER_MAPWORD - 1);
+    first_page_idx = first_page / __PAGE_SIZE;
+    curr_idx = first_page_idx / PAGES_PER_MAPWORD;
+    start_off = first_page_idx & (PAGES_PER_MAPWORD - 1);
+    end_page_idx = first_page_idx + nr_pages;
+    end_idx = end_page_idx / PAGES_PER_MAPWORD;
+    end_off = end_page_idx & (PAGES_PER_MAPWORD - 1);
      if (curr_idx == end_idx) {
          memr->mm_alloc_bitmap[curr_idx] &=
@@ -345,10 +357,25 @@ static int bbuddy_addmem(struct uk_alloc *a, void *base, size_t len)
      min = round_pgup((uintptr_t)base);
      max = round_pgdown((uintptr_t)base + (uintptr_t)len);

In the corner case of have len less that page, would the macro to round_pgup and round_pgdown, will cause the range to be negative. Should we not perform check for the range and report error in case of negative range. In the subsequent lines, we are writing to memory outside range.
      range = max - min;
-    memr_size =
-        round_pgup(sizeof(*memr) + DIV_ROUND_UP(range >> __PAGE_SHIFT, 8));
      memr = (struct uk_bbpalloc_memr *)min;
+
+    /*
+     * The number of pages is found by solving the inequality:
+     *
+     * sizeof(*memr) + bitmap_size + page_num * page_size <= range
+     *
+     * where: bitmap_size = page_num / BITS_PER_BYTE
+     *
+     */
+    memr->nr_pages =
+        BITS_PER_BYTE * (range - sizeof(*memr)) /
+        (BITS_PER_BYTE * __PAGE_SIZE + 1);
I had a bad time trying to understand this math. I would like to propose
a comment like this here
/* The available amount of memory in bits is:
  * BITS_PER_BYTE * (range - sizeof(*memr))
  *
  * Every page occupies one bit in the bitmap. So total number of bits
  * used by one page is:
  * BITS_PER_BYTE * __PAGE_SIZE + 1
  */

Unfortunately these comments are not quite right. They try to explain
the numerator and denominator, but the fraction is the result found by
solving the inequality.


I believe we can merge the two comments together as the suggested comment explains the intention of the subsequent code. The inequality is used in following code snippet.
         min += memr_size;
     range -= memr_size;
     if (max < min)

+    memr->mm_alloc_bitmap = (unsigned long *) (min + sizeof(*memr));
+    memr->mm_alloc_bitmap_size  =
+        round_pgup(memr->nr_pages / BITS_PER_BYTE) - sizeof(*memr);
I think I found a problem in the math..

Let's assume this function is called with len=132952064 (32459 pages). In
this case memr->nr_pages=32457,

Following the formula, you should get memr->nr_pages=32458, and for that
memr->mm_alloc_bitmap_size=4058.

memr->mm_alloc_bitmap_size=4056. However, to represent 32457 pages we
are going to need 32457/8 = 4057.125 = 4058 bytes.

That's right, 4058 is what you should get.

This math probably could be replaced with an easier one. Currently it is
a bit too complicated. It is difficult to verify, and quite easy to make
a mistake.

Here is an idea. What if we approach the problem from the other side. We
know how many pages one page of a bitmap can handle. I wrote a small
snippet to demonstrate:

#define BITMAP_FIRST_PAGE_BYTES ( __PAGE_SIZE -                \
                  sizeof(struct uk_bbpalloc_memr))
#define BITMAP_FIRST_PAGE_BITS ((ssize_t) (BITMAP_FIRST_PAGE_BYTES << 3))
#define BITS_PER_PAGE (__PAGE_SIZE << 3)

    ssize_t memr_pg_num = 1;
    ssize_t rem_pages = range >> __PAGE_SHIFT;

    /* The very first page is special - it is shared between memr
     * and initial portion of the bitmap.
     *
     * This page is already taken from the page budget, we
     * decrease number of pages we should care about.
     */
    if (rem_pages > BITMAP_FIRST_PAGE_BITS) {
        /* We have a bitmap that is capable of handling
         * BITMAP_FIRST_PAGE_BITS pages anyways.  If we are
         * here that was not enough
         */

        /* We don not need to care about pages handled by
         * bitmap in the first page
         */
        rem_pages -= BITMAP_FIRST_PAGE_BITS;

        /* To handle the remaining part we going to need
         * (rem_pages / BITS_PER_PAGE) pages. But with every
         * next bitmap page, the number of usable pages
         * reduces by 1. That is why we actually need to
         * divide by (BITS_PER_PAGE +1)
         */
        memr_pg_num += (rem_pages + BITS_PER_PAGE - 1) /
            (BITS_PER_PAGE + 1);
    }


What do you think?

To me this solution looks more complicated. Actually it is a whole
different approach.

If you want to understand my proposed fix, the focus should be on the
equality (understanding it, finding the value for page_num and comparing
the result with what's in the code).


I prefer this solution as it more readable. The suggested change may be more towards how we could improve on the allocator implementation and does not affect the purpose of this patch.

+    memr_size = sizeof(*memr) + memr->mm_alloc_bitmap_size;
+
      min += memr_size;
      range -= memr_size;
      if (max < min) {
@@ -362,10 +389,14 @@ static int bbuddy_addmem(struct uk_alloc *a, void *base, size_t len)
       * Initialize region's bitmap
       */
      memr->first_page = min;
-    memr->nr_pages = max - min;
      /* add to list */
      memr->next = b->memr_head;
      b->memr_head = memr;
+
+    /* All allocated by default. */
+    memset(memr->mm_alloc_bitmap, (unsigned char) ~0,
+            memr->mm_alloc_bitmap_size);
Very minor thing. Probably '0xff' is nicer then '(unsigned char) ~0'

Please explain why it would be nicer. For sure it is not more portable.

+
      /* free up the memory we've been given to play with */
      map_free(b, min, (unsigned long)(range >> __PAGE_SHIFT));
--
2.11.0


Thanks,
Costin


Thanks & Regards
Sharan Santhanam


Thanks & Regards
Sharan Santhanam

_______________________________________________
Minios-devel mailing list
Minios-devel@xxxxxxxxxxxxxxxxxxxx
https://lists.xenproject.org/mailman/listinfo/minios-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.