[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [PATCH v3] x86/mm: Add mem access rights to NPT



On Mi, 2018-07-18 at 15:33 +0000, George Dunlap wrote:
> 
> > 
> > On Jul 2, 2018, at 8:42 AM, Alexandru Isaila <aisaila@bitdefender.c
> > om> wrote:
> > 
> > From: Isaila Alexandru <aisaila@xxxxxxxxxxxxxxx>
> > 
> > This patch adds access rights for the NPT pages. The access rights
> > are
> > saved in a radix tree with the root saved in p2m_domain. The rights
> > are manipulated through p2m_set_access()
> > and p2m_get_access() functions.
> > The patch follows the ept logic.
> This description needs to be much more complete.  Something like
> this:
> 
> ---
> This patch adds access control for NPT mode.  
> 
> There aren’t enough extra bits to store the access rights in the NPT
> p2m table, so we add a radix tree to store the rights.  For
> efficiency, remove entries which match the default permissions rather
> than continuing to store them.
> 
> Modify p2m-pt.c:p2m_type_to_flags() to mirror the ept version: taking
> an access, and removing / adding RW or NX flags as appropriate.
> ---
> 
I will update the patch description.
> > 
> > 
> > Note: It was tested with xen-access write
> > 
> > Signed-off-by: Alexandru Isaila <aisaila@xxxxxxxxxxxxxxx>
> 
> > 
> > 
> > ---
> > Changes since V2:
> >     - Delete blak line
> >     - Add return if p2m_access_rwx = a
> >     - Delete the comment from p2m_pt_get_entry()
> >     - Moved radix_tree_init() to arch_monitor_init_domain().
> > ---
> > xen/arch/x86/mm/mem_access.c     |   3 ++
> > xen/arch/x86/mm/p2m-pt.c         | 109
> > ++++++++++++++++++++++++++++++++++-----
> > xen/arch/x86/mm/p2m.c            |   6 +++
> > xen/arch/x86/monitor.c           |  13 +++++
> > xen/include/asm-x86/mem_access.h |   2 +-
> > xen/include/asm-x86/p2m.h        |   6 +++
> > 6 files changed, 124 insertions(+), 15 deletions(-)
> > 
> > diff --git a/xen/arch/x86/mm/mem_access.c
> > b/xen/arch/x86/mm/mem_access.c
> > index c0cd017..d78c82c 100644
> > --- a/xen/arch/x86/mm/mem_access.c
> > +++ b/xen/arch/x86/mm/mem_access.c
> > @@ -221,7 +221,10 @@ bool p2m_mem_access_check(paddr_t gpa,
> > unsigned long gla,
> >         {
> >             req->u.mem_access.flags |= MEM_ACCESS_GLA_VALID;
> >             req->u.mem_access.gla = gla;
> > +        }
> > 
> > +        if ( npfec.gla_valid || cpu_has_svm )
> > +        {
> I can’t immediately tell what this is about, which means it needs a
> comment.
> 
> It may also be (depending on why this is here) that “cpu_has_svm”
> makes this more fragile — if this is really about having a radix
> tree, for instance, then you should probably check for a radix tree.

This is about the different npfec on SVN. The gla in never valid so the
fault flag will not be set.
> 
> > 
> >             if ( npfec.kind == npfec_kind_with_gla )
> >                 req->u.mem_access.flags |=
> > MEM_ACCESS_FAULT_WITH_GLA;
> >             else if ( npfec.kind == npfec_kind_in_gpt )
> > diff --git a/xen/arch/x86/mm/p2m-pt.c b/xen/arch/x86/mm/p2m-pt.c
> > index b8c5d2e..4330d1f 100644
> > --- a/xen/arch/x86/mm/p2m-pt.c
> > +++ b/xen/arch/x86/mm/p2m-pt.c
> > @@ -68,7 +68,8 @@
> > static unsigned long p2m_type_to_flags(const struct p2m_domain
> > *p2m,
> >                                        p2m_type_t t,
> >                                        mfn_t mfn,
> > -                                       unsigned int level)
> > +                                       unsigned int level,
> > +                                       p2m_access_t access)
> > {
> >     unsigned long flags;
> >     /*
> > @@ -87,23 +88,27 @@ static unsigned long p2m_type_to_flags(const
> > struct p2m_domain *p2m,
> >     case p2m_ram_paged:
> >     case p2m_ram_paging_in:
> >     default:
> > -        return flags | _PAGE_NX_BIT;
> > +        flags |= P2M_BASE_FLAGS | _PAGE_NX_BIT;
> > +        break;
> >     case p2m_grant_map_ro:
> >         return flags | P2M_BASE_FLAGS | _PAGE_NX_BIT;
> >     case p2m_ioreq_server:
> >         flags |= P2M_BASE_FLAGS | _PAGE_RW | _PAGE_NX_BIT;
> >         if ( p2m->ioreq.flags & XEN_DMOP_IOREQ_MEM_ACCESS_WRITE )
> > -            return flags & ~_PAGE_RW;
> > -        return flags;
> > +            flags &= ~_PAGE_RW;
> > +        break;
> >     case p2m_ram_ro:
> >     case p2m_ram_logdirty:
> >     case p2m_ram_shared:
> > -        return flags | P2M_BASE_FLAGS;
> > +        flags |= P2M_BASE_FLAGS;
> > +        break;
> >     case p2m_ram_rw:
> > -        return flags | P2M_BASE_FLAGS | _PAGE_RW;
> > +        flags |= P2M_BASE_FLAGS | _PAGE_RW;
> > +        break;
> >     case p2m_grant_map_rw:
> >     case p2m_map_foreign:
> > -        return flags | P2M_BASE_FLAGS | _PAGE_RW | _PAGE_NX_BIT;
> > +        flags |= P2M_BASE_FLAGS | _PAGE_RW | _PAGE_NX_BIT;
> > +        break;
> >     case p2m_mmio_direct:
> >         if ( !rangeset_contains_singleton(mmio_ro_ranges,
> > mfn_x(mfn)) )
> >             flags |= _PAGE_RW;
> > @@ -112,8 +117,37 @@ static unsigned long p2m_type_to_flags(const
> > struct p2m_domain *p2m,
> >             flags |= _PAGE_PWT;
> >             ASSERT(!level);
> >         }
> > -        return flags | P2M_BASE_FLAGS | _PAGE_PCD;
> > +        flags |= P2M_BASE_FLAGS | _PAGE_PCD;
> > +        break;
> > +    }
> I think you want a blank line here.
> 
> > 
> > +    switch ( access )
> > +    {
> > +        case p2m_access_r:
> > +        case p2m_access_w:
> > +            flags |= _PAGE_NX_BIT;
> > +            flags &= ~_PAGE_RW;
> > +            break;
> > +        case p2m_access_rw:
> > +            flags |= _PAGE_NX_BIT;
> > +            break;
> > +        case p2m_access_n:
> > +        case p2m_access_n2rwx:
> > +            flags |= _PAGE_NX_BIT;
> > +            flags &= ~_PAGE_RW;
> > +            break;
> > +        case p2m_access_rx:
> > +        case p2m_access_wx:
> > +        case p2m_access_rx2rw:
> > +            flags &= ~(_PAGE_NX_BIT | _PAGE_RW);
> This looks like a mistake — this will unconditionally enable
> execution, even if the underlying p2m type forbids it.
> ept_p2m_type_to_flags() doesn’t do that.
> 
> > 
> > +            break;
> > +        case p2m_access_x:
> > +            flags &= ~_PAGE_RW;
> > +            break;
> > +        case p2m_access_rwx:
> > +        default:
> > +            break;
> >     }
> I think you want another blank line here too.
> 
> Also, this doesn’t seem to capture the ‘r’ part of the equation —
> shouldn’t p2m_access_n end up with a not-present p2m entry?

SVM dosen't explicitly provide a read access bit so we treat read and
write the same way.

Alex
> 
> > 
> > +    return flags;
> > }
> > 
> > 
> > @@ -174,6 +208,44 @@ static void p2m_add_iommu_flags(l1_pgentry_t
> > *p2m_entry,
> >         l1e_add_flags(*p2m_entry, iommu_nlevel_to_flags(nlevel,
> > flags));
> > }
> > 
> > +static p2m_access_t p2m_get_access(struct p2m_domain *p2m,
> > unsigned long gfn)
> > +{
> > +    void *ptr;
> > +
> > +    if ( !p2m->mem_access_settings )
> > +        return p2m_access_rwx;
> > +
> > +    ptr = radix_tree_lookup(p2m->mem_access_settings, gfn);
> > +    if ( !ptr )
> > +        return p2m_access_rwx;
> > +    else
> > +        return radix_tree_ptr_to_int(ptr);
> > +}
> > +
> > +static void p2m_set_access(struct p2m_domain *p2m, unsigned long
> > gfn,
> > +                                      p2m_access_t a)
> > +{
> > +    int rc;
> > +
> > +    if ( !p2m->mem_access_settings )
> > +        return;
> > +
> > +    if ( p2m_access_rwx == a )
> > +    {
> > +        radix_tree_delete(p2m->mem_access_settings, gfn);
> > +        return;
> > +    }
> > +
> > +    rc = radix_tree_insert(p2m->mem_access_settings, gfn,
> > +                           radix_tree_int_to_ptr(a));
> > +    if ( rc == -EEXIST )
> > +        /* If a setting already exists, change it to the new one.
> > */
> > +        radix_tree_replace_slot(
> > +            radix_tree_lookup_slot(
> > +                p2m->mem_access_settings, gfn),
> > +            radix_tree_int_to_ptr(a));
> > +}
> > +
> > /* Returns: 0 for success, -errno for failure */
> > static int
> > p2m_next_level(struct p2m_domain *p2m, void **table,
> > @@ -201,6 +273,7 @@ p2m_next_level(struct p2m_domain *p2m, void
> > **table,
> >         new_entry = l1e_from_mfn(mfn, P2M_BASE_FLAGS | _PAGE_RW);
> > 
> >         p2m_add_iommu_flags(&new_entry, level,
> > IOMMUF_readable|IOMMUF_writable);
> > +        p2m_set_access(p2m, gfn, p2m->default_access);
> >         p2m->write_p2m_entry(p2m, gfn, p2m_entry, new_entry, level
> > + 1);
> >     }
> >     else if ( flags & _PAGE_PSE )
> > @@ -249,6 +322,7 @@ p2m_next_level(struct p2m_domain *p2m, void
> > **table,
> >         {
> >             new_entry = l1e_from_pfn(pfn | (i << ((level - 1) *
> > PAGETABLE_ORDER)),
> >                                      flags);
> > +            p2m_set_access(p2m, gfn, p2m->default_access);
> >             p2m->write_p2m_entry(p2m, gfn, l1_entry + i, new_entry,
> > level);
> >         }
> > 
> > @@ -256,6 +330,7 @@ p2m_next_level(struct p2m_domain *p2m, void
> > **table,
> > 
> >         new_entry = l1e_from_mfn(mfn, P2M_BASE_FLAGS | _PAGE_RW);
> >         p2m_add_iommu_flags(&new_entry, level,
> > IOMMUF_readable|IOMMUF_writable);
> > +        p2m_set_access(p2m, gfn, p2m->default_access);
> >         p2m->write_p2m_entry(p2m, gfn, p2m_entry, new_entry, level
> > + 1);
> >     }
> >     else
> > @@ -420,8 +495,9 @@ static int do_recalc(struct p2m_domain *p2m,
> > unsigned long gfn)
> >         if ( nt != ot )
> >         {
> >             unsigned long mfn = l1e_get_pfn(e);
> > +            p2m_access_t a = p2m_get_access(p2m, gfn);
> >             unsigned long flags = p2m_type_to_flags(p2m, nt,
> > -                                                    _mfn(mfn),
> > level);
> > +                                                    _mfn(mfn),
> > level, a);
> > 
> >             if ( level )
> >             {
> > @@ -569,13 +645,14 @@ p2m_pt_set_entry(struct p2m_domain *p2m,
> > gfn_t gfn_, mfn_t mfn,
> >         ASSERT(!mfn_valid(mfn) || p2mt != p2m_mmio_direct);
> >         l3e_content = mfn_valid(mfn) ||
> > p2m_allows_invalid_mfn(p2mt)
> >             ? p2m_l3e_from_pfn(mfn_x(mfn),
> > -                               p2m_type_to_flags(p2m, p2mt, mfn,
> > 2))
> > +                               p2m_type_to_flags(p2m, p2mt, mfn,
> > 2, p2ma))
> >             : l3e_empty();
> >         entry_content.l1 = l3e_content.l3;
> > 
> >         if ( entry_content.l1 != 0 )
> >             p2m_add_iommu_flags(&entry_content, 0,
> > iommu_pte_flags);
> > 
> > +        p2m_set_access(p2m, gfn, p2ma);
> >         p2m->write_p2m_entry(p2m, gfn, p2m_entry, entry_content,
> > 3);
> >         /* NB: paging_write_p2m_entry() handles tlb flushes
> > properly */
> >     }
> > @@ -608,7 +685,7 @@ p2m_pt_set_entry(struct p2m_domain *p2m, gfn_t
> > gfn_, mfn_t mfn,
> > 
> >         if ( mfn_valid(mfn) || p2m_allows_invalid_mfn(p2mt) )
> >             entry_content = p2m_l1e_from_pfn(mfn_x(mfn),
> > -                                         p2m_type_to_flags(p2m,
> > p2mt, mfn, 0));
> > +                                         p2m_type_to_flags(p2m,
> > p2mt, mfn, 0, p2ma));
> >         else
> >             entry_content = l1e_empty();
> > 
> > @@ -630,6 +707,7 @@ p2m_pt_set_entry(struct p2m_domain *p2m, gfn_t
> > gfn_, mfn_t mfn,
> >             p2m->ioreq.entry_count--;
> >         }
> > 
> > +        p2m_set_access(p2m, gfn, p2ma);
> >         /* level 1 entry */
> >         p2m->write_p2m_entry(p2m, gfn, p2m_entry, entry_content,
> > 1);
> >         /* NB: paging_write_p2m_entry() handles tlb flushes
> > properly */
> > @@ -661,13 +739,14 @@ p2m_pt_set_entry(struct p2m_domain *p2m,
> > gfn_t gfn_, mfn_t mfn,
> >         ASSERT(!mfn_valid(mfn) || p2mt != p2m_mmio_direct);
> >         l2e_content = mfn_valid(mfn) ||
> > p2m_allows_invalid_mfn(p2mt)
> >             ? p2m_l2e_from_pfn(mfn_x(mfn),
> > -                               p2m_type_to_flags(p2m, p2mt, mfn,
> > 1))
> > +                               p2m_type_to_flags(p2m, p2mt, mfn,
> > 1, p2ma))
> >             : l2e_empty();
> >         entry_content.l1 = l2e_content.l2;
> > 
> >         if ( entry_content.l1 != 0 )
> >             p2m_add_iommu_flags(&entry_content, 0,
> > iommu_pte_flags);
> > 
> > +        p2m_set_access(p2m, gfn, p2ma);
> >         p2m->write_p2m_entry(p2m, gfn, p2m_entry, entry_content,
> > 2);
> >         /* NB: paging_write_p2m_entry() handles tlb flushes
> > properly */
> >     }
> > @@ -749,8 +828,7 @@ p2m_pt_get_entry(struct p2m_domain *p2m, gfn_t
> > gfn_,
> >      * XXX Once we start explicitly registering MMIO regions in the
> > p2m 
> >      * XXX we will return p2m_invalid for unmapped gfns */
> >     *t = p2m_mmio_dm;
> > -    /* Not implemented except with EPT */
> > -    *a = p2m_access_rwx; 
> > +    *a = p2m_access_n;
> > 
> >     if ( gfn > p2m->max_mapped_pfn )
> >     {
> > @@ -813,6 +891,7 @@ pod_retry_l3:
> >                        l1_table_offset(addr));
> >             *t = p2m_recalc_type(recalc || _needs_recalc(flags),
> >                                  p2m_flags_to_type(flags), p2m,
> > gfn);
> > +            *a = p2m_get_access(p2m, gfn);
> >             unmap_domain_page(l3e);
> > 
> >             ASSERT(mfn_valid(mfn) || !p2m_is_ram(*t));
> > @@ -852,6 +931,7 @@ pod_retry_l2:
> >         mfn = _mfn(l2e_get_pfn(*l2e) + l1_table_offset(addr));
> >         *t = p2m_recalc_type(recalc || _needs_recalc(flags),
> >                              p2m_flags_to_type(flags), p2m, gfn);
> > +        *a = p2m_get_access(p2m, gfn);
> >         unmap_domain_page(l2e);
> > 
> >         ASSERT(mfn_valid(mfn) || !p2m_is_ram(*t));
> > @@ -888,6 +968,7 @@ pod_retry_l1:
> >     }
> >     mfn = l1e_get_mfn(*l1e);
> >     *t = p2m_recalc_type(recalc || _needs_recalc(flags), l1t, p2m,
> > gfn);
> > +    *a = p2m_get_access(p2m, gfn);
> >     unmap_domain_page(l1e);
> > 
> >     ASSERT(mfn_valid(mfn) || !p2m_is_ram(*t) || p2m_is_paging(*t));
> > diff --git a/xen/arch/x86/mm/p2m.c b/xen/arch/x86/mm/p2m.c
> > index c53cab4..12e2d24 100644
> > --- a/xen/arch/x86/mm/p2m.c
> > +++ b/xen/arch/x86/mm/p2m.c
> > @@ -675,6 +675,12 @@ void p2m_teardown(struct p2m_domain *p2m)
> > 
> >     d = p2m->domain;
> > 
> > +    if ( p2m->mem_access_settings )
> > +    {
> > +        radix_tree_destroy(p2m->mem_access_settings, NULL);
> > +        xfree(p2m->mem_access_settings);
> > +    }
> > +
> >     p2m_lock(p2m);
> >     ASSERT(atomic_read(&d->shr_pages) == 0);
> >     p2m->phys_table = pagetable_null();
> > diff --git a/xen/arch/x86/monitor.c b/xen/arch/x86/monitor.c
> > index 3fb6531..18b88a1 100644
> > --- a/xen/arch/x86/monitor.c
> > +++ b/xen/arch/x86/monitor.c
> > @@ -20,10 +20,13 @@
> >  */
> > 
> > #include <asm/monitor.h>
> > +#include <asm/p2m.h>
> > #include <public/vm_event.h>
> > 
> > int arch_monitor_init_domain(struct domain *d)
> > {
> > +    struct p2m_domain *p2m = p2m_get_hostp2m(d);
> > +
> >     if ( !d->arch.monitor.msr_bitmap )
> >         d->arch.monitor.msr_bitmap = xzalloc_array(struct
> > monitor_msr_bitmap,
> >                                                    2);
> > @@ -31,6 +34,16 @@ int arch_monitor_init_domain(struct domain *d)
> >     if ( !d->arch.monitor.msr_bitmap )
> >         return -ENOMEM;
> > 
> > +    if ( cpu_has_svm && !p2m->mem_access_settings )
> > +    {
> > +        p2m->mem_access_settings = xmalloc(struct
> > radix_tree_root);
> > +
> > +        if( !p2m->mem_access_settings )
> > +            return -ENOMEM;
> This will leak d->arch.monitor.msr_bitmap.  You need to use the `goto
> out_free:` pattern.
> 
> Everything else looks OK.
> 
>  -George
> 

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxxx
https://lists.xenproject.org/mailman/listinfo/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.