[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [RFC PATCH 4/7] common/pv-iommu: Add query, map and unmap ops



On Wed, Feb 10, 2016 at 10:10:32AM +0000, Malcolm Crossley wrote:
> Implement above ops according to PV-IOMMU design draft D.

.. which would be great if they were part of this patch series
and you could just: in docs/misc/blah.

> 
> Currently restricted to hardware domains only due to RFC status.

Um, .. that is not reassuring. Perhaps the design document (which
would be in the patchset) would iterate the rest of TODOs?

> 
> Signed-off-by: Malcolm Crossley <malcolm.crossley@xxxxxxxxxx>
> --
> Cc: jbeulich@xxxxxxxx
> Cc: keir@xxxxxxx
> Cc: tim@xxxxxxx
> Cc: andrew.cooper3@xxxxxxxxxx
> Cc: xen-devel@xxxxxxxxxxxxx
> ---
>  xen/common/pv_iommu.c         | 228 
> +++++++++++++++++++++++++++++++++++++++++-
>  xen/include/public/pv-iommu.h |  69 +++++++++++++
>  2 files changed, 296 insertions(+), 1 deletion(-)
>  create mode 100644 xen/include/public/pv-iommu.h
> 
> diff --git a/xen/common/pv_iommu.c b/xen/common/pv_iommu.c
> index 304fccf..91485f3 100644
> --- a/xen/common/pv_iommu.c
> +++ b/xen/common/pv_iommu.c
> @@ -17,13 +17,239 @@
>   * along with this program; If not, see <http://www.gnu.org/licenses/>.
>   */
>  
> +#include <asm/p2m.h>
> +#include <asm/event.h>

Can those be sorted?

>  #include <xen/guest_access.h>
> +#include <public/pv-iommu.h>
>  
>  #define ret_t long
>  
> +static int get_paged_frame(unsigned long gfn, unsigned long *frame,
> +                           struct page_info **page, int readonly,
> +                           struct domain *rd)
> +{
> +    int rc = 0;
> +#if defined(P2M_PAGED_TYPES) || defined(P2M_SHARED_TYPES)
> +    p2m_type_t p2mt;
> +
> +    *page = get_page_from_gfn(rd, gfn, &p2mt,
> +                             (readonly) ? P2M_ALLOC : P2M_UNSHARE);
> +    if ( !(*page) )
> +    {
> +        *frame = INVALID_MFN;
> +        if ( p2m_is_shared(p2mt) )
> +            return -EIO;
> +        if ( p2m_is_paging(p2mt) )
> +        {
> +            p2m_mem_paging_populate(rd, gfn);
> +            return -EIO;
> +        }
> +        return -EIO;
> +    }
> +    *frame = page_to_mfn(*page);
> +#else
> +    *frame = gmfn_to_mfn(rd, gfn);
> +    *page = mfn_valid(*frame) ? mfn_to_page(*frame) : NULL;
> +    if ( (!(page)) || (!get_page*page, rd) )
> +    {
> +        *frame = INVALID_MFN;
> +        *page = NULL;
> +        rc = -EIO;
> +    }
> +#endif
> +
> +    return rc;
> +}
> +
> +int can_use_iommu_check(struct domain *d)
> +{
> +    if ( !iommu_enabled || (!is_hardware_domain(d) && !need_iommu(d)) )
> +        return 0;
> +
> +    if ( is_hardware_domain(d) && iommu_passthrough )
> +        return 0;
> +

What if a PV guests calls this hypercall? Won't it crash on platform_ops 
derefence?

> +    if ( !domain_hvm_iommu(d)->platform_ops->lookup_page )
> +        return 0;
> +
> +    return 1;
> +}
> +
> +void do_iommu_sub_op(struct pv_iommu_op *op)
> +{
> +    struct domain *d = current->domain;
> +    struct domain *rd = NULL;
> +
> +    /* Only order 0 pages supported */

Missing full stop.

> +    if ( IOMMU_get_page_order(op->flags) != 0 )
> +    {
> +        op->status = -ENOSPC;

Not ENOSYS? or EINVAL?

> +        goto finish;
> +    }
> +
> +    switch ( op->subop_id )
> +    {
> +        case IOMMUOP_query_caps:
> +        {
> +            op->flags = 0;
> +            op->status = 0;
> +            if ( can_use_iommu_check(d) )
> +                op->flags |= IOMMU_QUERY_map_cap;

s/|=/=/ ?

> +
> +            break;
> +        }
> +        case IOMMUOP_map_page:
> +        {
> +            unsigned long mfn, tmp;

mfn_t pls.

> +            unsigned int flags = 0;

Not uint16_t ?
[edit: ah, this is for  iommu_map_page. You may want to mention that]
> +            struct page_info *page = NULL;
> +
> +            /* Check if calling domain can create IOMMU mappings */

Missing full stop.

> +            if ( !can_use_iommu_check(d) )
> +            {
> +                op->status = -EPERM;
> +                goto finish;
> +            }
> +
> +            /* Check we are the owner of the page */

Full stop missing.

> +            if ( !is_hardware_domain(d) &&
> +                 ( maddr_get_owner(op->u.map_page.gfn) != d ) )
> +            {
> +                op->status = -EPERM;
> +                goto finish;
> +            }
> +
> +            /* Lookup page struct backing gfn */

Stop full missing.

> +            if ( is_hardware_domain(d) &&
> +                (op->flags & IOMMU_MAP_OP_no_ref_cnt) )
> +            {
> +                mfn = op->u.map_page.gfn;
> +                page = mfn_to_page(mfn);
> +                if (!page)

Wrong syntax. if ( !page )

> +                {
> +                    op->status = -EPERM;
> +                    goto finish;
> +                }
> +            } else if ( get_paged_frame(op->u.map_page.gfn, &mfn, &page, 0, 
> d) )

I think the syntax waants the 'else if' on its own line.

You may want to put a comment around the 0 value.

> +            {
> +                op->status = -EPERM;
> +                goto finish;
> +            }
> +
> +            /* Check for conflict with existing BFN mappings */

Stop missing full.

> +            if ( !iommu_lookup_page(d, op->u.map_page.bfn, &tmp) )
> +            {
> +                if ( !(op->flags & IOMMU_MAP_OP_no_ref_cnt) )

This check is bit weak. The guest (not hardware domain) could set this
flag for fun and the code above would be OK with it.

AS your your check for IOMMU_MAP_OP_no_ref_cnt just tries to do
an get_paged_Frame.

You may just want to have some sanity checking on the flags at the start
of this code?

> +                    put_page(page);
> +                op->status = -EPERM;
> +                goto finish;
> +            }
> +
> +            if ( op->flags & IOMMU_OP_readable )
> +                flags |= IOMMUF_readable;
> +
> +            if ( op->flags & IOMMU_OP_writeable )
> +                flags |= IOMMUF_writable;
> +

What if neither option is set? Should we disallow that?

> +            if ( iommu_map_page(d, op->u.map_page.bfn, mfn, flags) )
> +            {
> +                if ( !(op->flags & IOMMU_MAP_OP_no_ref_cnt) )
> +                    put_page(page);

Again, the non hardware domain could set this.
> +                op->status = -EIO;
> +                goto finish;
> +            }
> +
> +            op->status = 0;
> +            break;
> +        }
> +
> +        case IOMMUOP_unmap_page:
> +        {
> +            unsigned long mfn;

mfn_t.
> +
> +            /* Check if there is a valid BFN mapping for this domain */

Full .. you know what I am going to say.

> +            if ( iommu_lookup_page(d, op->u.unmap_page.bfn, &mfn) )

Wait a minute. Just like that? Don't you want to do some sanity checks?

What if the guest decided for fun to do:
for ( long i = -1; i ; i--)
        hypercall(IOMMUOP_unmap_page, i);

It would naturally fail but we would be taking the hd->Arch.mapping_lock
quite a lot! PErhaps do the check (similar to how you have in the mapping
code?)

Maybe even move the checks in their own function?

> +            {
> +                op->status = -ENOENT;
> +                goto finish;
> +            }
> +
> +            if ( iommu_unmap_page(d, op->u.unmap_page.bfn) )
> +            {
> +                op->status = -EIO;

Why not propogage its error values?
> +                goto finish;
> +            }
> +
> +            op->status = 0;
> +            break;
> +        }
> +
> +        default:
> +            op->status = -ENODEV;
> +            break;
> +    }
> +
> +finish:
> +    if ( rd )
> +        rcu_unlock_domain(rd);
> +
> +    return;
> +}
> +
>  ret_t do_iommu_op(XEN_GUEST_HANDLE_PARAM(void) arg, unsigned int count)

Shouldn't this be changed to be pv_iommu_op_t? instead of void?


>  {
> -    return -ENOSYS;
> +    ret_t ret = 0;
> +    int i;

unsigned int ?
> +    struct pv_iommu_op op;
> +    struct domain *d = current->domain;
> +
> +    if ( !is_hardware_domain(d) )
> +        return -ENOSYS;

-EPERM ?

> +
> +    if ( (int)count < 0 )

Um. If you are doing this - why don't you just make the parameter be 'int'
instead of unsigned int ?

> +        return -EINVAL;
> +
> +    if ( count > 1 )
> +        this_cpu(iommu_dont_flush_iotlb) = 1;

What if the hypercall is  IOMMUOP_query_caps and count is zero (as it
seems it ought to be OK - you just query for the caps). Or what if it is
IOMMUOP_query_caps  and the count is 31415 ?

Perhaps you should do some sanity checks here for some of the
ops?

> +
> +    for ( i = 0; i < count; i++ )

Can you keep the signes of the count and i to be the same pls?

> +    {
> +        if ( i && hypercall_preempt_check() )
> +        {
> +            ret =  i;

Oh, that should definitly be mentioned in the header file.
[edit: I see now that you do hypercall preemption]

> +            goto flush_pages;
> +        }
> +        if ( unlikely(__copy_from_guest_offset(&op, arg, i, 1)) )
> +        {
> +            ret = -EFAULT;
> +            goto flush_pages;
> +        }
> +        do_iommu_sub_op(&op);
> +        if ( unlikely(__copy_to_guest_offset(arg, i, &op, 1)) )
> +        {
> +            ret = -EFAULT;
> +            goto flush_pages;
> +        }
> +    }
> +
> +flush_pages:
> +    if ( ret > 0 )
> +    {
> +        XEN_GUEST_HANDLE_PARAM(pv_iommu_op_t) op =
> +            guest_handle_cast(arg, pv_iommu_op_t);
> +        ASSERT(ret < count);
> +        guest_handle_add_offset(op, i);
> +        arg = guest_handle_cast(op, void);
> +        ret = hypercall_create_continuation(__HYPERVISOR_iommu_op,
> +                                           "hi", arg, count - i);
> +    }
> +    if ( count > 1 )
> +    {
> +        this_cpu(iommu_dont_flush_iotlb) = 0;
> +        if ( i )
> +            iommu_iotlb_flush_all(d);
> +    }
> +    return ret;
>  }
>  
>  /*
> diff --git a/xen/include/public/pv-iommu.h b/xen/include/public/pv-iommu.h
> new file mode 100644
> index 0000000..0f19f96
> --- /dev/null
> +++ b/xen/include/public/pv-iommu.h
> @@ -0,0 +1,69 @@
> +/*
> + * Permission is hereby granted, free of charge, to any person obtaining a 
> copy
> + * of this software and associated documentation files (the "Software"), to
> + * deal in the Software without restriction, including without limitation the
> + * rights to use, copy, modify, merge, publish, distribute, sublicense, 
> and/or
> + * sell copies of the Software, and to permit persons to whom the Software is
> + * furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice shall be included in
> + * all copies or substantial portions of the Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 
> THE
> + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
> + * DEALINGS IN THE SOFTWARE.
> + */
> +
> +#ifndef __XEN_PUBLIC_PV_IOMMU_H__
> +#define __XEN_PUBLIC_PV_IOMMU_H__
> +
> +#include "xen.h"
> +
> +#define IOMMUOP_query_caps            1
> +#define IOMMUOP_map_page              2
> +#define IOMMUOP_unmap_page            3

Also you are missing explanation of what those hypercalls do. You can 
copy-n-paste
from the design doc.

Perhaps IOMMU_OP_... ?
> +
> +struct pv_iommu_op {
> +    uint16_t subop_id;

Why subop? Isn't it 'op'? To make with IOMMUOP.. 

> +
> +#define IOMMU_page_order (0xf1 << 10)
> +#define IOMMU_get_page_order(flags) ((flags & IOMMU_page_order) >> 10)
> +#define IOMMU_QUERY_map_cap (1 << 0)
> +#define IOMMU_QUERY_map_all_mfns (1 << 1)
> +#define IOMMU_OP_readable (1 << 0)
> +#define IOMMU_OP_writeable (1 << 1)
> +#define IOMMU_MAP_OP_no_ref_cnt (1 << 2)

Something is off with your tabs. Could you make these be on the same column?

And since these are flags perhaps IOMMU_F_.. ?

Also the hypercall accepts two arguments. So what is the 'unsigned int size'
for ?

That should be mentioned here.

> +    uint16_t flags;
> +    int32_t status;
> +
> +    union {
> +        struct {
> +            uint64_t bfn;
> +            uint64_t gfn;
> +        } map_page;
> +
> +        struct {
> +            uint64_t bfn;
> +        } unmap_page;
> +    } u;
> +};
> +
> +
> +typedef struct pv_iommu_op pv_iommu_op_t;
> +DEFINE_XEN_GUEST_HANDLE(pv_iommu_op_t);
> +
> +#endif
> +
> +/*
> + * Local variables:
> + * mode: C
> + * c-file-style: "BSD"
> + * c-basic-offset: 4
> + * tab-width: 4
> + * indent-tabs-mode: nil
> + * End:
> + */
> -- 
> 1.7.12.4
> 
> 
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@xxxxxxxxxxxxx
> http://lists.xen.org/xen-devel

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.