[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [PATCH 1/3] x86/PoD: correctly handle non-order-0 decrease-reservation requests



On 12/04/2017 11:06 AM, Jan Beulich wrote:
> p2m_pod_decrease_reservation() returning just (not) all-done is not
> sufficient for the caller: If some pages were processed,
> guest_remove_page() returning an error for those pages is the expected
> result rather than an indication of a problem. Make guest_remove_page()
> return a distinct error code for this very case, and special case
> handling in case of seeing this error code in decrease_reservation().

The solution is good, but I think it needs more comments and a better
explanation.

How about:

---
p2m_pod_decrease_reservation() at the moment only returns a boolean
value: true for "nothing more to do", false for "something more to do".
If it returns false, decrease_reservation() will loop over the entire
range, calling guest_remove_page() for each page.

Unfortunately, in the case p2m_pod_decrease_reservation() succeeds
partially, some of the memory in the range will be not-present; at which
point guest_remove_page() will return an error, and the entire operation
will fail.

Fix this by:
1. Having p2m_pod_decrease_reservation() return exactly the number of
gpfn pages it has handled (i.e., replaced with 'not present')
2. Making guest_remove_page() return -ENOENT in the case that the gpfn
in question was already empty (and in no other cases)
3. When looping over guest_remove_page(), expect the number of -ENOENT
failures to be exactly equal to the number of pages
p2m_pod_decrease_reservation() removed.
---

> 
> Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
> 
> --- a/xen/arch/arm/p2m.c
> +++ b/xen/arch/arm/p2m.c
> @@ -393,10 +393,10 @@ int guest_physmap_mark_populate_on_deman
>      return -ENOSYS;
>  }
>  
> -int p2m_pod_decrease_reservation(struct domain *d, gfn_t gfn,
> -                                 unsigned int order)
> +unsigned long p2m_pod_decrease_reservation(struct domain *d, gfn_t gfn,
> +                                           unsigned int order)
>  {
> -    return -ENOSYS;
> +    return 0;
>  }
>  
>  static void p2m_set_permission(lpae_t *e, p2m_type_t t, p2m_access_t a)
> --- a/xen/arch/x86/mm/p2m-pod.c
> +++ b/xen/arch/x86/mm/p2m-pod.c
> @@ -510,11 +510,10 @@ p2m_pod_zero_check_superpage(struct p2m_
>   * Once both of these functions have been completed, we can return and
>   * allow decrease_reservation() to handle everything else.
>   */
> -int
> +unsigned long
>  p2m_pod_decrease_reservation(struct domain *d, gfn_t gfn, unsigned int order)
>  {
> -    int ret = 0;
> -    unsigned long i, n;
> +    unsigned long ret = 0, i, n;
>      struct p2m_domain *p2m = p2m_get_hostp2m(d);
>      bool_t steal_for_cache;
>      long pod, nonpod, ram;
> @@ -577,9 +576,9 @@ p2m_pod_decrease_reservation(struct doma
>                  domain_crash(d);
>              goto out_unlock;
>          }
> -        p2m->pod.entry_count -= 1UL << order;
> +        ret = 1UL << order;
> +        p2m->pod.entry_count -= ret;
>          BUG_ON(p2m->pod.entry_count < 0);
> -        ret = 1;
>          goto out_entry_check;
>      }
>  
> @@ -630,6 +629,7 @@ p2m_pod_decrease_reservation(struct doma
>              p2m->pod.entry_count -= n;
>              BUG_ON(p2m->pod.entry_count < 0);
>              pod -= n;
> +            ret += n;
>          }
>          else if ( steal_for_cache && p2m_is_ram(t) )
>          {
> @@ -664,16 +664,10 @@ p2m_pod_decrease_reservation(struct doma
>  
>              nonpod -= n;
>              ram -= n;
> +            ret += n;
>          }
>      }
>  
> -    /*
> -     * If there are no more non-PoD entries, tell decrease_reservation() that
> -     * there's nothing left to do.
> -     */
> -    if ( nonpod == 0 )
> -        ret = 1;
> -
>  out_entry_check:
>      /* If we've reduced our "liabilities" beyond our "assets", free some */
>      if ( p2m->pod.entry_count < p2m->pod.count )
> --- a/xen/common/memory.c
> +++ b/xen/common/memory.c
> @@ -284,13 +284,15 @@ int guest_remove_page(struct domain *d,
>  
>  #ifdef CONFIG_X86
>      mfn = get_gfn_query(d, gmfn, &p2mt);
> +    if ( unlikely(p2mt == p2m_invalid) || unlikely(p2mt == p2m_mmio_dm) )
> +        return -ENOENT;
>      if ( unlikely(p2m_is_paging(p2mt)) )
>      {
>          rc = guest_physmap_remove_page(d, _gfn(gmfn), mfn, 0);
> -        put_gfn(d, gmfn);
> -
>          if ( rc )
> -            return rc;
> +            goto out_put_gfn;
> +
> +        put_gfn(d, gmfn);
>  
>          /* If the page hasn't yet been paged out, there is an
>           * actual page that needs to be released. */
> @@ -308,9 +310,7 @@ int guest_remove_page(struct domain *d,
>      if ( p2mt == p2m_mmio_direct )
>      {
>          rc = clear_mmio_p2m_entry(d, gmfn, mfn, PAGE_ORDER_4K);
> -        put_gfn(d, gmfn);
> -
> -        return rc;
> +        goto out_put_gfn;
>      }
>  #else
>      mfn = gfn_to_mfn(d, _gfn(gmfn));
> @@ -335,10 +335,8 @@ int guest_remove_page(struct domain *d,
>          rc = mem_sharing_unshare_page(d, gmfn, 0);
>          if ( rc )
>          {
> -            put_gfn(d, gmfn);
>              (void)mem_sharing_notify_enomem(d, gmfn, 0);
> -
> -            return rc;
> +            goto out_put_gfn;
>          }
>          /* Maybe the mfn changed */
>          mfn = get_gfn_query_unlocked(d, gmfn, &p2mt);
> @@ -375,9 +373,10 @@ int guest_remove_page(struct domain *d,
>          put_page(page);
>  
>      put_page(page);
> + out_put_gfn: __maybe_unused
>      put_gfn(d, gmfn);
>  
> -    return rc;
> +    return rc != -ENOENT ? rc : -EINVAL;

/*
 * Filter out -ENOENT return values that aren't a result of an
 * empty p2m entry
 */

>  }
>  
>  static void decrease_reservation(struct memop_args *a)
> @@ -392,6 +391,8 @@ static void decrease_reservation(struct
>  
>      for ( i = a->nr_done; i < a->nr_extents; i++ )
>      {
> +        unsigned long pod_done;
> +
>          if ( i != a->nr_done && hypercall_preempt_check() )
>          {
>              a->preempted = 1;
> @@ -416,14 +417,25 @@ static void decrease_reservation(struct
>          }
>  
>          /* See if populate-on-demand wants to handle this */
> -        if ( is_hvm_domain(a->domain)
> -             && p2m_pod_decrease_reservation(a->domain, _gfn(gmfn),
> -                                             a->extent_order) )
> -            continue;
> +        pod_done = is_hvm_domain(a->domain) ?
> +                   p2m_pod_decrease_reservation(a->domain, _gfn(gmfn),
> +                                                a->extent_order) : 0;
>  
> -        for ( j = 0; j < (1 << a->extent_order); j++ )
> -            if ( guest_remove_page(a->domain, gmfn + j) )

/*
 * Look for pages not handled by p2m_pod_decrease_reservation().
 *
 * guest_remove_page() will return -ENOENT for pages which have already
 * been removed by p2m_pod_decrease_reservation(); so expect to see
 * exactly pod_done failures.  Any more means that there were invalid
 * entries before p2m_pod_decrease_reservation() was called.
 */

> +        for ( j = 0; j + pod_done < (1UL << a->extent_order); j++ )
> +        {
> +            switch ( guest_remove_page(a->domain, gmfn + j) )
> +            {
> +            case 0:
> +                break;
> +            case -ENOENT:
> +                if ( !pod_done )
> +                    goto out;
> +                --pod_done;
> +                break;
> +            default:
>                  goto out;
> +            }
> +        }
>      }

What about:

ASSERT(pod_done == 0);

Thanks,
 -George

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxxx
https://lists.xenproject.org/mailman/listinfo/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.