[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Minios-devel] [UNIKRAFT PATCH v2] lib/ukallocbbuddy: Correct region bitmap allocation and usage



Costin Lupu <costin.lup@xxxxxxxxx> writes:

> Hi Yuri,
>
> Thanks a lot for your comments. Please see my replies inline.
>
> On 06/22/2018 08:03 PM, Yuri Volchkov wrote:
>> Hey Costin,
>> 
>> there are some comments inline.
>> 
>> -Yuri.
>> 
>> Costin Lupu <costin.lupu@xxxxxxxxx> writes:
>> 
>>> The usage of a each memory region that is added to the binary
>>> buddy allocator is tracked with a bitmap. This patch corrects
>>> wrong size calculation for the bitmap and wrong calculations
>>> of bit postitions.
>>>
>>> Signed-off-by: Costin Lupu <costin.lupu@xxxxxxxxx>
>>> ---
>>>  lib/ukallocbbuddy/bbuddy.c | 83 
>>> +++++++++++++++++++++++++++++++---------------
>>>  1 file changed, 57 insertions(+), 26 deletions(-)
>>>
>>> diff --git a/lib/ukallocbbuddy/bbuddy.c b/lib/ukallocbbuddy/bbuddy.c
>>> index 20a9b70..63288f0 100644
>>> --- a/lib/ukallocbbuddy/bbuddy.c
>>> +++ b/lib/ukallocbbuddy/bbuddy.c
>>> @@ -69,7 +69,7 @@ struct uk_bbpalloc_memr {
>>>     unsigned long first_page;
>>>     unsigned long nr_pages;
>>>     unsigned long mm_alloc_bitmap_size;
>>> -   unsigned long mm_alloc_bitmap[];
>>> +   unsigned long *mm_alloc_bitmap;
>>>  };
>>>  
>>>  struct uk_bbpalloc {
>>> @@ -93,10 +93,11 @@ struct uk_bbpalloc {
>>>   *  *_idx == Index into `mm_alloc_bitmap' array.
>>>   *  *_off == Bit offset within an element of the `mm_alloc_bitmap' array.
>>>   */
>>> -#define PAGES_PER_MAPWORD (sizeof(unsigned long) * 8)
>>> +#define BITS_PER_BYTE     8
>>> +#define PAGES_PER_MAPWORD (sizeof(unsigned long) * BITS_PER_BYTE)
>>>  
>>>  static inline struct uk_bbpalloc_memr *map_get_memr(struct uk_bbpalloc *b,
>>> -                                               unsigned long page_num)
>>> +                                               unsigned long page_va)
>>>  {
>>>     struct uk_bbpalloc_memr *memr = NULL;
>>>  
>>> @@ -106,8 +107,9 @@ static inline struct uk_bbpalloc_memr 
>>> *map_get_memr(struct uk_bbpalloc *b,
>>>      * of them. It should be just one region in most cases
>>>      */
>>>     for (memr = b->memr_head; memr != NULL; memr = memr->next) {
>>> -           if ((page_num >= memr->first_page)
>>> -               && (page_num < (memr->first_page + memr->nr_pages)))
>>> +           if ((page_va >= memr->first_page)
>>> +               && (page_va < (memr->first_page +
>>> +               memr->nr_pages * __PAGE_SIZE)))
>> Is not a huge performance improvement, but better to use
>>    memr->nr_pages << __PAGE_SHIFT
>
> Agreed, although I believe it is easier to understand it using the
> multiplication.
The good thing about shift - it shows an intention. Shift implies that
the result will be a power of 2 - which is what you wanted here.

And actually this is more common practice in many other OSes, for
example shifts are all over the linux code.

>
>>>                     return memr;
>>>     }
>>>  
>>> @@ -117,24 +119,29 @@ static inline struct uk_bbpalloc_memr 
>>> *map_get_memr(struct uk_bbpalloc *b,
>>>     return NULL;
>>>  }
>>>  
>>> -static inline int allocated_in_map(struct uk_bbpalloc *b,
>>> -                              unsigned long page_num)
>>> +static inline unsigned long allocated_in_map(struct uk_bbpalloc *b,
>>> +                              unsigned long page_va)
>>>  {
>>> -   struct uk_bbpalloc_memr *memr = map_get_memr(b, page_num);
>>> +   struct uk_bbpalloc_memr *memr = map_get_memr(b, page_va);
>>> +   unsigned long page_idx;
>>> +   unsigned long bm_idx, bm_off;
>>>  
>>>     /* treat pages outside of region as allocated */
>>>     if (!memr)
>>>             return 1;
>>>  
>>> -   page_num -= memr->first_page;
>>> -   return ((memr)->mm_alloc_bitmap[(page_num) / PAGES_PER_MAPWORD]
>>> -           & (1UL << ((page_num) & (PAGES_PER_MAPWORD - 1))));
>>> +   page_idx = (page_va - memr->first_page) / __PAGE_SIZE;
>>> +   bm_idx = page_idx / PAGES_PER_MAPWORD;
>>> +   bm_off = page_idx & (PAGES_PER_MAPWORD - 1);
>>> +
>>> +   return ((memr)->mm_alloc_bitmap[bm_idx] & (1UL << bm_off));
>>>  }
>>>  
>>>  static void map_alloc(struct uk_bbpalloc *b, uintptr_t first_page,
>>>                   unsigned long nr_pages)
>>>  {
>>>     struct uk_bbpalloc_memr *memr;
>>> +   unsigned long first_page_idx, end_page_idx;
>>>     unsigned long start_off, end_off, curr_idx, end_idx;
>>>  
>>>     /*
>>> @@ -144,14 +151,16 @@ static void map_alloc(struct uk_bbpalloc *b, 
>>> uintptr_t first_page,
>>>      */
>>>     memr = map_get_memr(b, first_page);
>>>     UK_ASSERT(memr != NULL);
>>> -   UK_ASSERT((first_page + nr_pages)
>>> -             <= (memr->first_page + memr->nr_pages));
>>> +   UK_ASSERT((first_page + nr_pages * __PAGE_SIZE)
>>> +             <= (memr->first_page + memr->nr_pages * __PAGE_SIZE));
>>>  
>>>     first_page -= memr->first_page;
>>> -   curr_idx = first_page / PAGES_PER_MAPWORD;
>>> -   start_off = first_page & (PAGES_PER_MAPWORD - 1);
>>> -   end_idx = (first_page + nr_pages) / PAGES_PER_MAPWORD;
>>> -   end_off = (first_page + nr_pages) & (PAGES_PER_MAPWORD - 1);
>>> +   first_page_idx = first_page / __PAGE_SIZE;
>>> +   curr_idx = first_page_idx / PAGES_PER_MAPWORD;
>>> +   start_off = first_page_idx & (PAGES_PER_MAPWORD - 1);
>>> +   end_page_idx = first_page_idx + nr_pages;
>>> +   end_idx = end_page_idx / PAGES_PER_MAPWORD;
>>> +   end_off = end_page_idx & (PAGES_PER_MAPWORD - 1);
>> Same preference of shifts over divisions. I stop pointing it out here,
>> but I think using shift in other places in the code would improve a
>> little bit performance, and readability. 
>
> Actually, in my opinion the division is more readable than the shifting.
> Other than that, I agree.
>
>>>  
>>>     if (curr_idx == end_idx) {
>>>             memr->mm_alloc_bitmap[curr_idx] |=
>>> @@ -170,6 +179,7 @@ static void map_free(struct uk_bbpalloc *b, uintptr_t 
>>> first_page,
>>>                  unsigned long nr_pages)
>>>  {
>>>     struct uk_bbpalloc_memr *memr;
>>> +   unsigned long first_page_idx, end_page_idx;
>>>     unsigned long start_off, end_off, curr_idx, end_idx;
>>>  
>>>     /*
>>> @@ -179,14 +189,16 @@ static void map_free(struct uk_bbpalloc *b, uintptr_t 
>>> first_page,
>>>      */
>>>     memr = map_get_memr(b, first_page);
>>>     UK_ASSERT(memr != NULL);
>>> -   UK_ASSERT((first_page + nr_pages)
>>> -             <= (memr->first_page + memr->nr_pages));
>>> +   UK_ASSERT((first_page + nr_pages * __PAGE_SIZE)
>>> +             <= (memr->first_page + memr->nr_pages * __PAGE_SIZE));
>>>  
>>>     first_page -= memr->first_page;
>>> -   curr_idx = first_page / PAGES_PER_MAPWORD;
>>> -   start_off = first_page & (PAGES_PER_MAPWORD - 1);
>>> -   end_idx = (first_page + nr_pages) / PAGES_PER_MAPWORD;
>>> -   end_off = (first_page + nr_pages) & (PAGES_PER_MAPWORD - 1);
>>> +   first_page_idx = first_page / __PAGE_SIZE;
>>> +   curr_idx = first_page_idx / PAGES_PER_MAPWORD;
>>> +   start_off = first_page_idx & (PAGES_PER_MAPWORD - 1);
>>> +   end_page_idx = first_page_idx + nr_pages;
>>> +   end_idx = end_page_idx / PAGES_PER_MAPWORD;
>>> +   end_off = end_page_idx & (PAGES_PER_MAPWORD - 1);
>>>  
>>>     if (curr_idx == end_idx) {
>>>             memr->mm_alloc_bitmap[curr_idx] &=
>>> @@ -345,10 +357,25 @@ static int bbuddy_addmem(struct uk_alloc *a, void 
>>> *base, size_t len)
>>>     min = round_pgup((uintptr_t)base);
>>>     max = round_pgdown((uintptr_t)base + (uintptr_t)len);
>>>     range = max - min;
>>> -   memr_size =
>>> -       round_pgup(sizeof(*memr) + DIV_ROUND_UP(range >> __PAGE_SHIFT, 8));
>>>  
>>>     memr = (struct uk_bbpalloc_memr *)min;
>>> +
>>> +   /*
>>> +    * The number of pages is found by solving the inequality:
>>> +    *
>>> +    * sizeof(*memr) + bitmap_size + page_num * page_size <= range
>>> +    *
>>> +    * where: bitmap_size = page_num / BITS_PER_BYTE
>>> +    *
>>> +    */
>>> +   memr->nr_pages =
>>> +           BITS_PER_BYTE * (range - sizeof(*memr)) /
>>> +           (BITS_PER_BYTE * __PAGE_SIZE + 1);
>> I had a bad time trying to understand this math. I would like to propose
>> a comment like this here
>> /* The available amount of memory in bits is:
>>  * BITS_PER_BYTE * (range - sizeof(*memr))
>>  *
>>  * Every page occupies one bit in the bitmap. So total number of bits
>>  * used by one page is:
>>  * BITS_PER_BYTE * __PAGE_SIZE + 1
>>  */
>
> Unfortunately these comments are not quite right. They try to explain
> the numerator and denominator, but the fraction is the result found by
> solving the inequality.
Alright. But it would be great to see it in the code how you got from
inequality to the formula.

Fortunately we have several people working on this project. It would be
great if anybody could easily understand/verify/modify the logic,
without contacting you, or spending and extra hour solving inequality on
the paper.

Maybe my explanation is not answering how you got the formula
exactly. But I think it actually shows the physical meaning of it (or
was it completely wrong?)

>
>>> +   memr->mm_alloc_bitmap = (unsigned long *) (min + sizeof(*memr));
>>> +   memr->mm_alloc_bitmap_size  =
>>> +           round_pgup(memr->nr_pages / BITS_PER_BYTE) - sizeof(*memr);
>> I think I found a problem in the math..
>> 
>> Let's assume this function is called with len=132952064 (32459 pages). In
>> this case memr->nr_pages=32457,
>
> Following the formula, you should get memr->nr_pages=32458, and for that
> memr->mm_alloc_bitmap_size=4058.
>
>> memr->mm_alloc_bitmap_size=4056. However, to represent 32457 pages we
>> are going to need 32457/8 = 4057.125 = 4058 bytes.
>
> That's right, 4058 is what you should get.
But what you got is 4056. It was not just a mental exercise. I have
tried it on the actual code. I set a breakpoint to the function entry,
and set len to 132952064.

>
>> This math probably could be replaced with an easier one. Currently it is
>> a bit too complicated. It is difficult to verify, and quite easy to make
>> a mistake.
>> 
>> Here is an idea. What if we approach the problem from the other side. We
>> know how many pages one page of a bitmap can handle. I wrote a small
>> snippet to demonstrate:
>> 
>> #define BITMAP_FIRST_PAGE_BYTES ( __PAGE_SIZE -                              
>> \
>>                                sizeof(struct uk_bbpalloc_memr))
>> #define BITMAP_FIRST_PAGE_BITS ((ssize_t) (BITMAP_FIRST_PAGE_BYTES << 3))
>> #define BITS_PER_PAGE (__PAGE_SIZE << 3)
>> 
>>      ssize_t memr_pg_num = 1;
>>      ssize_t rem_pages = range >> __PAGE_SHIFT;
>> 
>>      /* The very first page is special - it is shared between memr
>>       * and initial portion of the bitmap.
>>       *
>>       * This page is already taken from the page budget, we
>>       * decrease number of pages we should care about.
>>       */
>>      if (rem_pages > BITMAP_FIRST_PAGE_BITS) {
>>              /* We have a bitmap that is capable of handling
>>               * BITMAP_FIRST_PAGE_BITS pages anyways.  If we are
>>               * here that was not enough
>>               */
>> 
>>              /* We don not need to care about pages handled by
>>               * bitmap in the first page
>>               */
>>              rem_pages -= BITMAP_FIRST_PAGE_BITS;
>> 
>>              /* To handle the remaining part we going to need
>>               * (rem_pages / BITS_PER_PAGE) pages. But with every
>>               * next bitmap page, the number of usable pages
>>               * reduces by 1. That is why we actually need to
>>               * divide by (BITS_PER_PAGE +1)
>>               */
>>              memr_pg_num += (rem_pages + BITS_PER_PAGE - 1) /
>>                      (BITS_PER_PAGE + 1);
>>      }
>> 
>> 
>> What do you think?
>
> To me this solution looks more complicated. Actually it is a whole
> different approach.
>
> If you want to understand my proposed fix, the focus should be on the
> equality (understanding it, finding the value for page_num and comparing
> the result with what's in the code).
I understood it. I a just did not like amount of effort I spent for that
:). 


>
>>> +   memr_size = sizeof(*memr) + memr->mm_alloc_bitmap_size;
>>> +
>>>     min += memr_size;
>>>     range -= memr_size;
>>>     if (max < min) {
>>> @@ -362,10 +389,14 @@ static int bbuddy_addmem(struct uk_alloc *a, void 
>>> *base, size_t len)
>>>      * Initialize region's bitmap
>>>      */
>>>     memr->first_page = min;
>>> -   memr->nr_pages = max - min;
>>>     /* add to list */
>>>     memr->next = b->memr_head;
>>>     b->memr_head = memr;
>>> +
>>> +   /* All allocated by default. */
>>> +   memset(memr->mm_alloc_bitmap, (unsigned char) ~0,
>>> +                   memr->mm_alloc_bitmap_size);
>> Very minor thing. Probably '0xff' is nicer then '(unsigned char) ~0'
>
> Please explain why it would be nicer. For sure it is not more
> portable.
The (unsigned char) is always one byte. And 0xff is just shorter. But it
is ok for me to leave it like this.

>
>>> +
>>>     /* free up the memory we've been given to play with */
>>>     map_free(b, min, (unsigned long)(range >> __PAGE_SHIFT));
>>>  
>>> -- 
>>> 2.11.0
>>>
>
> Thanks,
> Costin

-- 
Yuri Volchkov
Software Specialist

NEC Europe Ltd
Kurfürsten-Anlage 36
D-69115 Heidelberg

_______________________________________________
Minios-devel mailing list
Minios-devel@xxxxxxxxxxxxxxxxxxxx
https://lists.xenproject.org/mailman/listinfo/minios-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.