[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [PATCH V3 28/29] x86/vvtd: Add queued invalidation (QI) support



On Thu, Sep 21, 2017 at 11:02:09PM -0400, Lan Tianyu wrote:
> From: Chao Gao <chao.gao@xxxxxxxxx>
> 
> Queued Invalidation Interface is an expanded invalidation interface with
> extended capabilities. Hardware implementations report support for queued
> invalidation interface through the Extended Capability Register. The queued
> invalidation interface uses an Invalidation Queue (IQ), which is a circular
> buffer in system memory. Software submits commands by writing Invalidation
> Descriptors to the IQ.
> 
> In this patch, a new function viommu_process_iq() is used for emulating how
> hardware handles invalidation requests through QI.
> 
> Signed-off-by: Chao Gao <chao.gao@xxxxxxxxx>
> Signed-off-by: Lan Tianyu <tianyu.lan@xxxxxxxxx>
> ---
>  xen/drivers/passthrough/vtd/iommu.h |  19 ++-
>  xen/drivers/passthrough/vtd/vvtd.c  | 232 
> ++++++++++++++++++++++++++++++++++++
>  2 files changed, 250 insertions(+), 1 deletion(-)
> 
> diff --git a/xen/drivers/passthrough/vtd/iommu.h 
> b/xen/drivers/passthrough/vtd/iommu.h
> index c69cd21..c2b83f1 100644
> --- a/xen/drivers/passthrough/vtd/iommu.h
> +++ b/xen/drivers/passthrough/vtd/iommu.h
> @@ -177,6 +177,21 @@
>  #define DMA_IRTA_S(val)         (val & 0xf)
>  #define DMA_IRTA_SIZE(val)      (1UL << (DMA_IRTA_S(val) + 1))
>  
> +/* IQA_REG */
> +#define DMA_IQA_ADDR(val)       (val & ~0xfffULL)
> +#define DMA_IQA_QS(val)         (val & 0x7)
> +#define DMA_IQA_RSVD            0xff8ULL
> +
> +/* IECTL_REG */
> +#define DMA_IECTL_IM_SHIFT 31
> +#define DMA_IECTL_IM            (1 << DMA_IECTL_IM_SHIFT)

Isn't this undefined behavior? It should be 1u.

You should consider using u for all the defines added.

> +#define DMA_IECTL_IP_SHIFT 30
> +#define DMA_IECTL_IP            (1 << DMA_IECTL_IP_SHIFT)
> +
> +/* ICS_REG */
> +#define DMA_ICS_IWC_SHIFT       0
> +#define DMA_ICS_IWC             (1 << DMA_ICS_IWC_SHIFT)
> +
>  /* PMEN_REG */
>  #define DMA_PMEN_EPM    (((u32)1) << 31)
>  #define DMA_PMEN_PRS    (((u32)1) << 0)
> @@ -211,7 +226,8 @@
>  #define DMA_FSTS_PPF (1U << DMA_FSTS_PPF_SHIFT)
>  #define DMA_FSTS_AFO (1U << 2)
>  #define DMA_FSTS_APF (1U << 3)
> -#define DMA_FSTS_IQE (1U << 4)
> +#define DMA_FSTS_IQE_SHIFT 4
> +#define DMA_FSTS_IQE (1U << DMA_FSTS_IQE_SHIFT)
>  #define DMA_FSTS_ICE (1U << 5)
>  #define DMA_FSTS_ITE (1U << 6)
>  #define DMA_FSTS_PRO_SHIFT 7
> @@ -562,6 +578,7 @@ struct qinval_entry {
>  
>  /* Queue invalidation head/tail shift */
>  #define QINVAL_INDEX_SHIFT 4
> +#define QINVAL_INDEX_MASK  0x7fff0ULL
>  
>  #define qinval_present(v) ((v).lo & 1)
>  #define qinval_fault_disable(v) (((v).lo >> 1) & 1)
> diff --git a/xen/drivers/passthrough/vtd/vvtd.c 
> b/xen/drivers/passthrough/vtd/vvtd.c
> index 55f7a46..668d0c9 100644
> --- a/xen/drivers/passthrough/vtd/vvtd.c
> +++ b/xen/drivers/passthrough/vtd/vvtd.c
> @@ -28,6 +28,7 @@
>  #include <asm/current.h>
>  #include <asm/event.h>
>  #include <asm/hvm/domain.h>
> +#include <asm/hvm/support.h>
>  #include <asm/io_apic.h>
>  #include <asm/page.h>
>  #include <asm/p2m.h>
> @@ -419,6 +420,177 @@ static int vvtd_record_fault(struct vvtd *vvtd,
>      return X86EMUL_OKAY;
>  }
>  
> +/*
> + * Process a invalidation descriptor. Currently, only two types descriptors,
> + * Interrupt Entry Cache Invalidation Descritor and Invalidation Wait
> + * Descriptor are handled.
> + * @vvtd: the virtual vtd instance
> + * @i: the index of the invalidation descriptor to be processed
> + *
> + * If success return 0, or return non-zero when failure.
> + */
> +static int process_iqe(struct vvtd *vvtd, int i)

unsigned int.

> +{
> +    uint64_t iqa;
> +    struct qinval_entry *qinval_page;
> +    int ret = 0;
> +
> +    iqa = vvtd_get_reg_quad(vvtd, DMAR_IQA_REG);
> +    qinval_page = map_guest_page(vvtd->domain, 
> DMA_IQA_ADDR(iqa)>>PAGE_SHIFT);

PFN_DOWN instead of open coding the shift. Both can be initialized
at declaration. Also AFAICT iqa is only used once, so the local
variable is not needed.

> +    if ( IS_ERR(qinval_page) )
> +    {
> +        gdprintk(XENLOG_ERR, "Can't map guest IRT (rc %ld)",
> +                 PTR_ERR(qinval_page));
> +        return PTR_ERR(qinval_page);
> +    }
> +
> +    switch ( qinval_page[i].q.inv_wait_dsc.lo.type )
> +    {
> +    case TYPE_INVAL_WAIT:
> +        if ( qinval_page[i].q.inv_wait_dsc.lo.sw )
> +        {
> +            uint32_t data = qinval_page[i].q.inv_wait_dsc.lo.sdata;
> +            uint64_t addr = (qinval_page[i].q.inv_wait_dsc.hi.saddr << 2);

Unneeded parentheses.

> +
> +            ret = hvm_copy_to_guest_phys(addr, &data, sizeof(data), current);
> +            if ( ret )
> +                vvtd_info("Failed to write status address");

Don't you need to return or do something here? (like raise some kind
of error?)

> +        }
> +
> +        /*
> +         * The following code generates an invalidation completion event
> +         * indicating the invalidation wait descriptor completion. Note that
> +         * the following code fragment is not tested properly.
> +         */
> +        if ( qinval_page[i].q.inv_wait_dsc.lo.iflag )
> +        {
> +            uint32_t ie_data, ie_addr;

Missing newline, but...

> +            if ( !vvtd_test_and_set_bit(vvtd, DMAR_ICS_REG, 
> DMA_ICS_IWC_SHIFT) )
> +            {
> +                vvtd_set_bit(vvtd, DMAR_IECTL_REG, DMA_IECTL_IP_SHIFT);
> +                if ( !vvtd_test_bit(vvtd, DMAR_IECTL_REG, 
> DMA_IECTL_IM_SHIFT) )
> +                {
> +                    ie_data = vvtd_get_reg(vvtd, DMAR_IEDATA_REG);
> +                    ie_addr = vvtd_get_reg(vvtd, DMAR_IEADDR_REG);
> +                    vvtd_generate_interrupt(vvtd, ie_addr, ie_data);

...you don't seem two need the two local variables. They are used only
once.

> +                    vvtd_clear_bit(vvtd, DMAR_IECTL_REG, DMA_IECTL_IP_SHIFT);
> +                }
> +            }
> +        }
> +        break;
> +
> +    case TYPE_INVAL_IEC:
> +        /*
> +         * Currently, no cache is preserved in hypervisor. Only need to 
> update
> +         * pIRTEs which are modified in binding process.
> +         */
> +        break;
> +
> +    default:
> +        goto error;

There's no reason to use a label that's only used for the default
case. Simply place the code in the error label here.

> +    }
> +
> +    unmap_guest_page((void*)qinval_page);
> +    return ret;
> +
> + error:
> +    unmap_guest_page((void*)qinval_page);
> +    gdprintk(XENLOG_ERR, "Internal error in Queue Invalidation.\n");
> +    domain_crash(vvtd->domain);

Do you really need to crash the domain in such case?

> +    return ret;
> +}
> +
> +/*
> + * Invalidate all the descriptors in Invalidation Queue.
> + */
> +static void vvtd_process_iq(struct vvtd *vvtd)
> +{
> +    uint64_t iqh, iqt, iqa, max_entry, i;
> +    int err = 0;
> +
> +    /*
> +     * No new descriptor is fetched from the Invalidation Queue until
> +     * software clears the IQE field in the Fault Status Register
> +     */
> +    if ( vvtd_test_bit(vvtd, DMAR_FSTS_REG, DMA_FSTS_IQE_SHIFT) )
> +        return;
> +
> +    iqh = vvtd_get_reg_quad(vvtd, DMAR_IQH_REG);
> +    iqt = vvtd_get_reg_quad(vvtd, DMAR_IQT_REG);
> +    iqa = vvtd_get_reg_quad(vvtd, DMAR_IQA_REG);
> +
> +    max_entry = 1 << (QINVAL_ENTRY_ORDER + DMA_IQA_QS(iqa));
> +    iqh = MASK_EXTR(iqh, QINVAL_INDEX_MASK);
> +    iqt = MASK_EXTR(iqt, QINVAL_INDEX_MASK);

This should be done above, when they are initialized.

> +
> +    ASSERT(iqt < max_entry);
> +    if ( iqh == iqt )
> +        return;
> +
> +    for ( i = iqh; i != iqt; i = (i + 1) % max_entry )
> +    {
> +        err = process_iqe(vvtd, i);

process_iqe takes an int parameter, and here you are feeding it a
uint64_t.

> +        if ( err )
> +            break;
> +    }
> +    vvtd_set_reg_quad(vvtd, DMAR_IQH_REG, i << QINVAL_INDEX_SHIFT);
> +
> +    /*
> +     * When IQE set, IQH references the desriptor associated with the error.
> +     */
> +    if ( err )
> +        vvtd_report_non_recoverable_fault(vvtd, DMA_FSTS_IQE_SHIFT);
> +}
> +
> +static int vvtd_write_iqt(struct vvtd *vvtd, unsigned long val)

Not sure there's much point in making this function return int, when
all the return values are X86EMUL_OKAY.

Sionce val here is a register AFAICT, I would rather prefer that you
use an explicit type size (uint64_t or uint32_t as fits).

> +{
> +    uint64_t max_entry, iqa = vvtd_get_reg_quad(vvtd, DMAR_IQA_REG);
> +
> +    if ( val & ~QINVAL_INDEX_MASK )
> +    {
> +        vvtd_info("Attempt to set reserved bits in Invalidation Queue Tail");
> +        return X86EMUL_OKAY;
> +    }
> +
> +    max_entry = 1 << (QINVAL_ENTRY_ORDER + DMA_IQA_QS(iqa));

1ull please.

> +    if ( MASK_EXTR(val, QINVAL_INDEX_MASK) >= max_entry )
> +    {
> +        vvtd_info("IQT: Value %lx exceeded supported max index.", val);
> +        return X86EMUL_OKAY;
> +    }
> +
> +    vvtd_set_reg_quad(vvtd, DMAR_IQT_REG, val);
> +    vvtd_process_iq(vvtd);
> +
> +    return X86EMUL_OKAY;
> +}
> +
> +static int vvtd_write_iqa(struct vvtd *vvtd, unsigned long val)

Same here, it seems like this function should return void, because the
current return value is meaningless, and same comment about 'val'
being uintXX_t.

> +{
> +    uint32_t cap = vvtd_get_reg(vvtd, DMAR_CAP_REG);
> +    unsigned int guest_max_addr_width = cap_mgaw(cap);
> +
> +    if ( val & (~((1ULL << guest_max_addr_width) - 1) | DMA_IQA_RSVD) )
> +    {
> +        vvtd_info("Attempt to set reserved bits in Invalidation Queue 
> Address");
> +        return X86EMUL_OKAY;
> +    }
> +
> +    vvtd_set_reg_quad(vvtd, DMAR_IQA_REG, val);
> +    return X86EMUL_OKAY;
> +}
> +
> +static int vvtd_write_ics(struct vvtd *vvtd, uint32_t val)
> +{
> +    if ( val & DMA_ICS_IWC )
> +    {
> +        vvtd_clear_bit(vvtd, DMAR_ICS_REG, DMA_ICS_IWC_SHIFT);
> +        /*When IWC field is cleared, the IP field needs to be cleared */
             ^ missing space.

> +        vvtd_clear_bit(vvtd, DMAR_IECTL_REG, DMA_IECTL_IP_SHIFT);
> +    }
> +    return X86EMUL_OKAY;

This function wants to be void.

> +}
> +
>  static int vvtd_write_frcd3(struct vvtd *vvtd, uint32_t val)
>  {
>      /* Writing a 1 means clear fault */
> @@ -430,6 +602,30 @@ static int vvtd_write_frcd3(struct vvtd *vvtd, uint32_t 
> val)
>      return X86EMUL_OKAY;
>  }
>  
> +static int vvtd_write_iectl(struct vvtd *vvtd, uint32_t val)

void please.

> +{
> +    /*
> +     * Only DMA_IECTL_IM bit is writable. Generate pending event when unmask.
> +     */

Single line comments use /* ... */

> +    if ( !(val & DMA_IECTL_IM) )
> +    {
> +        /* Clear IM and clear IP */
> +        vvtd_clear_bit(vvtd, DMAR_IECTL_REG, DMA_IECTL_IM_SHIFT);
> +        if ( vvtd_test_and_clear_bit(vvtd, DMAR_IECTL_REG, 
> DMA_IECTL_IP_SHIFT) )
> +        {
> +            uint32_t ie_data, ie_addr;
> +
> +            ie_data = vvtd_get_reg(vvtd, DMAR_IEDATA_REG);
> +            ie_addr = vvtd_get_reg(vvtd, DMAR_IEADDR_REG);
> +            vvtd_generate_interrupt(vvtd, ie_addr, ie_data);

No need for ie_data and ie_addr.

> +        }
> +    }
> +    else
> +        vvtd_set_bit(vvtd, DMAR_IECTL_REG, DMA_IECTL_IM_SHIFT);
> +
> +    return X86EMUL_OKAY;
> +}
> +
>  static int vvtd_write_fectl(struct vvtd *vvtd, uint32_t val)
>  {
>      /*
> @@ -476,6 +672,10 @@ static int vvtd_write_fsts(struct vvtd *vvtd, uint32_t 
> val)
>      if ( !((vvtd_get_reg(vvtd, DMAR_FSTS_REG) & DMA_FSTS_FAULTS)) )
>          vvtd_clear_bit(vvtd, DMAR_FECTL_REG, DMA_FECTL_IP_SHIFT);
>  
> +    /* Continue to deal invalidation when IQE is clear */
> +    if ( !vvtd_test_bit(vvtd, DMAR_FSTS_REG, DMA_FSTS_IQE_SHIFT) )
> +        vvtd_process_iq(vvtd);
> +
>      return X86EMUL_OKAY;
>  }
>  
> @@ -611,6 +811,32 @@ static int vvtd_write(struct vcpu *v, unsigned long addr,
>          case DMAR_FECTL_REG:
>              return vvtd_write_fectl(vvtd, val);
>  
> +        case DMAR_IECTL_REG:
> +            return vvtd_write_iectl(vvtd, val);
> +
> +        case DMAR_ICS_REG:
> +            return vvtd_write_ics(vvtd, val);
> +
> +        case DMAR_IQT_REG:
> +            return vvtd_write_iqt(vvtd, (uint32_t)val);
> +
> +        case DMAR_IQA_REG:
> +        {
> +            uint32_t iqa_hi;
> +
> +            iqa_hi = vvtd_get_reg(vvtd, DMAR_IQA_REG_HI);

Initialization at declaration time, but since it's used only once, I
would rather prefer that you don't use a local variable at all.

> +            return vvtd_write_iqa(vvtd,
> +                                 (uint32_t)val | ((uint64_t)iqa_hi << 32));
> +        }
> +
> +        case DMAR_IQA_REG_HI:
> +        {
> +            uint32_t iqa_lo;
> +
> +            iqa_lo = vvtd_get_reg(vvtd, DMAR_IQA_REG);
> +            return vvtd_write_iqa(vvtd, (val << 32) | iqa_lo);

Same comment as above regarding iqa_lo.

Thanks, Roger.

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
https://lists.xen.org/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.