|
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [Xen-devel] [PATCH v4 16/28] x86/vvtd: Add queued invalidation (QI) support
On Fri, Nov 17, 2017 at 02:22:23PM +0800, Chao Gao wrote:
> Queued Invalidation Interface is an expanded invalidation interface with
> extended capabilities. Hardware implementations report support for queued
> invalidation interface through the Extended Capability Register. The queued
> invalidation interface uses an Invalidation Queue (IQ), which is a circular
> buffer in system memory. Software submits commands by writing Invalidation
> Descriptors to the IQ.
>
> In this patch, a new function viommu_process_iq() is used for emulating how
> hardware handles invalidation requests through QI.
You should mention that QI is mandatory in order to support interrupt
remapping.
I was about to ask whether QI could be deferred to a later stage, but
AFAICT this is not an option.
> Signed-off-by: Chao Gao <chao.gao@xxxxxxxxx>
> Signed-off-by: Lan Tianyu <tianyu.lan@xxxxxxxxx>
>
> ---
> v4:
> - Introduce a lock to protect invalidation related registers.
> ---
> xen/drivers/passthrough/vtd/iommu.h | 24 +++-
> xen/drivers/passthrough/vtd/vvtd.c | 271
> +++++++++++++++++++++++++++++++++++-
> 2 files changed, 293 insertions(+), 2 deletions(-)
>
> diff --git a/xen/drivers/passthrough/vtd/iommu.h
> b/xen/drivers/passthrough/vtd/iommu.h
> index b71dab8..de9188b 100644
> --- a/xen/drivers/passthrough/vtd/iommu.h
> +++ b/xen/drivers/passthrough/vtd/iommu.h
> @@ -47,7 +47,12 @@
> #define DMAR_IQH_REG 0x80 /* invalidation queue head */
> #define DMAR_IQT_REG 0x88 /* invalidation queue tail */
> #define DMAR_IQA_REG 0x90 /* invalidation queue addr */
> +#define DMAR_IQUA_REG 0x94 /* invalidation queue upper addr */
> +#define DMAR_ICS_REG 0x9c /* invalidation completion status */
> #define DMAR_IECTL_REG 0xa0 /* invalidation event control register
> */
> +#define DMAR_IEDATA_REG 0xa4 /* invalidation event data register */
> +#define DMAR_IEADDR_REG 0xa8 /* invalidation event address register
> */
> +#define DMAR_IEUADDR_REG 0xac /* upper address register */
> #define DMAR_IRTA_REG 0xb8 /* base address of intr remap table */
> #define DMAR_IRTUA_REG 0xbc /* upper address of intr remap table */
>
> @@ -175,6 +180,21 @@
> #define DMA_IRTA_S(val) (val & 0xf)
> #define DMA_IRTA_SIZE(val) (1UL << (DMA_IRTA_S(val) + 1))
>
> +/* IQA_REG */
> +#define DMA_IQA_ADDR(val) (val & ~0xfffULL)
> +#define DMA_IQA_QS(val) (val & 0x7)
> +#define DMA_IQA_RSVD 0xff8ULL
> +
> +/* IECTL_REG */
> +#define DMA_IECTL_IM_SHIFT 31
> +#define DMA_IECTL_IM (1U << DMA_IECTL_IM_SHIFT)
> +#define DMA_IECTL_IP_SHIFT 30
> +#define DMA_IECTL_IP (1U << DMA_IECTL_IP_SHIFT)
> +
> +/* ICS_REG */
> +#define DMA_ICS_IWC_SHIFT 0
> +#define DMA_ICS_IWC (1U << DMA_ICS_IWC_SHIFT)
> +
> /* PMEN_REG */
> #define DMA_PMEN_EPM (((u32)1) << 31)
> #define DMA_PMEN_PRS (((u32)1) << 0)
> @@ -205,13 +225,14 @@
> /* FSTS_REG */
> #define DMA_FSTS_PFO_SHIFT 0
> #define DMA_FSTS_PPF_SHIFT 1
> +#define DMA_FSTS_IQE_SHIFT 4
> #define DMA_FSTS_PRO_SHIFT 7
>
> #define DMA_FSTS_PFO ((uint32_t)1 << DMA_FSTS_PFO_SHIFT)
> #define DMA_FSTS_PPF ((uint32_t)1 << DMA_FSTS_PPF_SHIFT)
> #define DMA_FSTS_AFO ((uint32_t)1 << 2)
> #define DMA_FSTS_APF ((uint32_t)1 << 3)
> -#define DMA_FSTS_IQE ((uint32_t)1 << 4)
> +#define DMA_FSTS_IQE ((uint32_t)1 << DMA_FSTS_IQE_SHIFT)
> #define DMA_FSTS_ICE ((uint32_t)1 << 5)
> #define DMA_FSTS_ITE ((uint32_t)1 << 6)
> #define DMA_FSTS_PRO ((uint32_t)1 << DMA_FSTS_PRO_SHIFT)
> @@ -555,6 +576,7 @@ struct qinval_entry {
>
> /* Queue invalidation head/tail shift */
> #define QINVAL_INDEX_SHIFT 4
> +#define QINVAL_INDEX_MASK 0x7fff0ULL
>
> #define qinval_present(v) ((v).lo & 1)
> #define qinval_fault_disable(v) (((v).lo >> 1) & 1)
> diff --git a/xen/drivers/passthrough/vtd/vvtd.c
> b/xen/drivers/passthrough/vtd/vvtd.c
> index a2fa64a..81170ec 100644
> --- a/xen/drivers/passthrough/vtd/vvtd.c
> +++ b/xen/drivers/passthrough/vtd/vvtd.c
> @@ -27,6 +27,7 @@
> #include <asm/event.h>
> #include <asm/io_apic.h>
> #include <asm/hvm/domain.h>
> +#include <asm/hvm/support.h>
> #include <asm/p2m.h>
>
> #include "iommu.h"
> @@ -68,6 +69,9 @@ struct vvtd {
>
> struct hvm_hw_vvtd hw;
> void *irt_base;
> + void *inv_queue_base;
Why not declare this as:
struct qinval_entry *
> + /* This lock protects invalidation related registers */
> + spinlock_t ie_lock;
As noted in another patch, I think the first approach should be to use
a single lock that serializes access to the whole vIOMMU register
space. Later we can see about more fine grained locking.
> };
>
> /* Setting viommu_verbose enables debugging messages of vIOMMU */
> @@ -284,6 +288,12 @@ static void vvtd_notify_fault(const struct vvtd *vvtd)
> vvtd_get_reg(vvtd, DMAR_FEDATA_REG));
> }
>
> +static void vvtd_notify_inv_completion(const struct vvtd *vvtd)
> +{
> + vvtd_generate_interrupt(vvtd, vvtd_get_reg_quad(vvtd, DMAR_IEADDR_REG),
> + vvtd_get_reg(vvtd, DMAR_IEDATA_REG));
> +}
> +
> /* Computing the IRTE index for a given interrupt request. When success,
> return
> * 0 and set index to reference the corresponding IRTE. Otherwise, return <
> 0,
> * i.e. -1 when the irq request isn't an remapping format.
> @@ -478,6 +488,189 @@ static int vvtd_record_fault(struct vvtd *vvtd,
> return X86EMUL_OKAY;
> }
>
> +/*
> + * Process an invalidation descriptor. Currently, only two types descriptors,
> + * Interrupt Entry Cache Invalidation Descritor and Invalidation Wait
> + * Descriptor are handled.
> + * @vvtd: the virtual vtd instance
> + * @i: the index of the invalidation descriptor to be processed
> + *
> + * If success return 0, or return non-zero when failure.
> + */
> +static int process_iqe(struct vvtd *vvtd, uint32_t i)
> +{
> + struct qinval_entry qinval;
> + int ret = 0;
> +
> + if ( !vvtd->inv_queue_base )
> + {
> + gdprintk(XENLOG_ERR, "Invalidation queue base isn't set\n");
> + return -1;
If you just return -1 or 0 please use bool instead. Or return proper
error codes.
> + }
> + qinval = ((struct qinval_entry *)vvtd->inv_queue_base)[i];
See my comment above regarding how inv_queue_base is declared, I'm not
sure why the copy is needed here.
> +
> + switch ( qinval.q.inv_wait_dsc.lo.type )
> + {
> + case TYPE_INVAL_WAIT:
> + if ( qinval.q.inv_wait_dsc.lo.sw )
> + {
> + uint32_t data = qinval.q.inv_wait_dsc.lo.sdata;
> + uint64_t addr = qinval.q.inv_wait_dsc.hi.saddr << 2;
> +
> + ret = hvm_copy_to_guest_phys(addr, &data, sizeof(data), current);
> + if ( ret )
> + vvtd_info("Failed to write status address\n");
> + }
> +
> + /*
> + * The following code generates an invalidation completion event
> + * indicating the invalidation wait descriptor completion. Note that
> + * the following code fragment is not tested properly.
> + */
> + if ( qinval.q.inv_wait_dsc.lo.iflag )
> + {
> + if ( !vvtd_test_and_set_bit(vvtd, DMAR_ICS_REG,
> DMA_ICS_IWC_SHIFT) )
> + {
> + vvtd_set_bit(vvtd, DMAR_IECTL_REG, DMA_IECTL_IP_SHIFT);
> + if ( !vvtd_test_bit(vvtd, DMAR_IECTL_REG,
> DMA_IECTL_IM_SHIFT) )
> + {
> + vvtd_notify_inv_completion(vvtd);
> + vvtd_clear_bit(vvtd, DMAR_IECTL_REG, DMA_IECTL_IP_SHIFT);
> + }
> + }
> + }
> + break;
> +
> + case TYPE_INVAL_IEC:
> + /* No cache is preserved in vvtd, nothing is needed to be flushed */
> + break;
> +
> + default:
> + vvtd_debug("d%d: Invalidation type (%x) isn't supported\n",
> + vvtd->domain->domain_id, qinval.q.inv_wait_dsc.lo.type);
> + return -1;
> + }
> +
> + return ret;
> +}
> +
> +/*
> + * Invalidate all the descriptors in Invalidation Queue.
> + */
> +static void vvtd_process_iq(struct vvtd *vvtd)
> +{
> + uint32_t max_entry, i, iqh, iqt;
> + int err = 0;
> +
> + /* Trylock avoids more than 1 caller dealing with invalidation requests
> */
> + if ( !spin_trylock(&vvtd->ie_lock) )
Uh, is this correct? You are returning without the queue being
invalidated AFAICT.
> + return;
> +
> + iqh = MASK_EXTR(vvtd_get_reg_quad(vvtd, DMAR_IQH_REG),
> QINVAL_INDEX_MASK);
> + iqt = MASK_EXTR(vvtd_get_reg_quad(vvtd, DMAR_IQT_REG),
> QINVAL_INDEX_MASK);
> + /*
> + * No new descriptor is fetched from the Invalidation Queue until
> + * software clears the IQE field in the Fault Status Register
> + */
> + if ( vvtd_test_bit(vvtd, DMAR_FSTS_REG, DMA_FSTS_IQE_SHIFT) )
> + {
> + spin_unlock(&vvtd->ie_lock);
> + return;
> + }
> +
> + max_entry = 1 << (QINVAL_ENTRY_ORDER +
> + DMA_IQA_QS(vvtd_get_reg_quad(vvtd, DMAR_IQA_REG)));
> +
> + ASSERT(iqt < max_entry);
Is it possible for the user to write a valid value to DMAR_IQT_REG and
then change DMAR_IQA_REG in order to make the above ASSERT trigger?
> + if ( iqh == iqt )
> + {
> + spin_unlock(&vvtd->ie_lock);
> + return;
> + }
> +
> + for ( i = iqh; i != iqt; i = (i + 1) % max_entry )
> + {
> + err = process_iqe(vvtd, i);
> + if ( err )
> + break;
> + }
> +
> + /*
> + * set IQH before checking error, because IQH should reference
> + * the desriptor associated with the error when an error is seen
> + * by guest
> + */
> + vvtd_set_reg_quad(vvtd, DMAR_IQH_REG, i << QINVAL_INDEX_SHIFT);
> +
> + spin_unlock(&vvtd->ie_lock);
> + if ( err )
> + {
> + spin_lock(&vvtd->fe_lock);
> + vvtd_report_non_recoverable_fault(vvtd, DMA_FSTS_IQE_SHIFT);
> + spin_unlock(&vvtd->fe_lock);
> + }
> +}
> +
> +static void vvtd_write_iqt(struct vvtd *vvtd, uint32_t val)
> +{
> + uint32_t max_entry;
> +
> + if ( val & ~QINVAL_INDEX_MASK )
> + {
> + vvtd_info("attempts to set reserved bits in IQT\n");
> + return;
> + }
> +
> + max_entry = 1U << (QINVAL_ENTRY_ORDER +
> + DMA_IQA_QS(vvtd_get_reg_quad(vvtd, DMAR_IQA_REG)));
> + if ( MASK_EXTR(val, QINVAL_INDEX_MASK) >= max_entry )
> + {
> + vvtd_info("IQT: Value %x exceeded supported max index.", val);
> + return;
> + }
> +
> + vvtd_set_reg(vvtd, DMAR_IQT_REG, val);
> +}
> +
> +static void vvtd_write_iqa(struct vvtd *vvtd, uint32_t val, bool high)
> +{
> + uint64_t cap = vvtd_get_reg_quad(vvtd, DMAR_CAP_REG);
> + uint64_t old = vvtd_get_reg_quad(vvtd, DMAR_IQA_REG);
> + uint64_t new;
> +
> + if ( high )
> + new = ((uint64_t)val << 32) | (old & 0xffffffff);
> + else
> + new = ((old >> 32) << 32) | val;
You can also use old & ~0xffffffffUL
> +
> + if ( new & (~((1ULL << cap_mgaw(cap)) - 1) | DMA_IQA_RSVD) )
> + {
> + vvtd_info("Attempt to set reserved bits in IQA\n");
> + return;
> + }
> +
> + vvtd_set_reg_quad(vvtd, DMAR_IQA_REG, new);
> + if ( high && !vvtd->inv_queue_base )
> + vvtd->inv_queue_base = map_guest_pages(vvtd->domain,
> + PFN_DOWN(DMA_IQA_ADDR(new)),
> + 1 << DMA_IQA_QS(new));
Don't you need to pick a reference to this page(s)?
> + else if ( !high && vvtd->inv_queue_base )
I'm not sure I follow the logic with high here.
> + {
> + unmap_guest_pages(vvtd->inv_queue_base, 1 << DMA_IQA_QS(old));
> + vvtd->inv_queue_base = NULL;
> + }
> +}
> +
> +static void vvtd_write_ics(struct vvtd *vvtd, uint32_t val)
> +{
> + if ( val & DMA_ICS_IWC )
> + {
> + vvtd_clear_bit(vvtd, DMAR_ICS_REG, DMA_ICS_IWC_SHIFT);
> + /* When IWC field is cleared, the IP field needs to be cleared */
> + vvtd_clear_bit(vvtd, DMAR_IECTL_REG, DMA_IECTL_IP_SHIFT);
> + }
> +}
> +
> static int vvtd_write_frcd3(struct vvtd *vvtd, uint32_t val)
> {
> /* Writing a 1 means clear fault */
> @@ -489,6 +682,20 @@ static int vvtd_write_frcd3(struct vvtd *vvtd, uint32_t
> val)
> return X86EMUL_OKAY;
> }
>
> +static void vvtd_write_iectl(struct vvtd *vvtd, uint32_t val)
> +{
> + /* Only DMA_IECTL_IM bit is writable. Generate pending event when unmask
> */
> + if ( !(val & DMA_IECTL_IM) )
> + {
> + /* Clear IM and clear IP */
> + vvtd_clear_bit(vvtd, DMAR_IECTL_REG, DMA_IECTL_IM_SHIFT);
> + if ( vvtd_test_and_clear_bit(vvtd, DMAR_IECTL_REG,
> DMA_IECTL_IP_SHIFT) )
> + vvtd_notify_inv_completion(vvtd);
> + }
> + else
> + vvtd_set_bit(vvtd, DMAR_IECTL_REG, DMA_IECTL_IM_SHIFT);
> +}
> +
> static void vvtd_write_fectl(struct vvtd *vvtd, uint32_t val)
> {
> /*
> @@ -681,6 +888,48 @@ static void vvtd_write_fault_regs(struct vvtd *vvtd,
> unsigned long val,
> spin_unlock(&vvtd->fe_lock);
> }
>
> +static void vvtd_write_invalidation_regs(struct vvtd *vvtd, unsigned long
> val,
> + unsigned int offset, unsigned int
> len)
> +{
> + spin_lock(&vvtd->ie_lock);
> + for ( ; len ; len -= 4, offset += 4, val = val >> 32)
Same comment as in the previous patch, I don't really like the for
loop, but I guess 64bit access must be allowed to these grup of
registers?
> + {
> + switch ( offset )
> + {
> + case DMAR_IECTL_REG:
> + vvtd_write_iectl(vvtd, val);
> + break;
> +
> + case DMAR_ICS_REG:
> + vvtd_write_ics(vvtd, val);
> + break;
> +
> + case DMAR_IQT_REG:
> + vvtd_write_iqt(vvtd, val);
> + break;
> +
> + case DMAR_IQA_REG:
> + vvtd_write_iqa(vvtd, val, 0);
> + break;
> +
> + case DMAR_IQUA_REG:
> + vvtd_write_iqa(vvtd, val, 1);
> + break;
> +
> + case DMAR_IEDATA_REG:
> + case DMAR_IEADDR_REG:
> + case DMAR_IEUADDR_REG:
> + vvtd_set_reg(vvtd, offset, val);
> + break;
> +
> + default:
> + break;
> + }
> + }
> + spin_unlock(&vvtd->ie_lock);
> +
> +}
> +
> static int vvtd_write(struct vcpu *v, unsigned long addr,
> unsigned int len, unsigned long val)
> {
> @@ -719,6 +968,17 @@ static int vvtd_write(struct vcpu *v, unsigned long addr,
> vvtd_write_fault_regs(vvtd, val, offset, len);
> break;
>
> + case DMAR_IECTL_REG:
> + case DMAR_ICS_REG:
> + case DMAR_IQT_REG:
> + case DMAR_IQA_REG:
> + case DMAR_IQUA_REG:
> + case DMAR_IEDATA_REG:
> + case DMAR_IEADDR_REG:
> + case DMAR_IEUADDR_REG:
> + vvtd_write_invalidation_regs(vvtd, val, offset, len);
> + break;
> +
> default:
> if ( (offset == (fault_offset + DMA_FRCD2_OFFSET)) ||
> (offset == (fault_offset + DMA_FRCD3_OFFSET)) )
> @@ -840,7 +1100,8 @@ static int vvtd_handle_irq_request(const struct domain
> *d,
> irte.remap.tm);
>
> out:
> - atomic_dec(&vvtd->inflight_intr);
> + if ( !atomic_dec_and_test(&vvtd->inflight_intr) )
> + vvtd_process_iq(vvtd);
> return ret;
> }
>
> @@ -911,6 +1172,7 @@ static int vvtd_create(struct domain *d, struct viommu
> *viommu)
> vvtd->domain = d;
> register_mmio_handler(d, &vvtd_mmio_ops);
> spin_lock_init(&vvtd->fe_lock);
> + spin_lock_init(&vvtd->ie_lock);
>
> viommu->priv = vvtd;
>
> @@ -930,6 +1192,13 @@ static int vvtd_destroy(struct viommu *viommu)
> sizeof(struct iremap_entry)));
> vvtd->irt_base = NULL;
> }
> + if ( vvtd->inv_queue_base )
> + {
> + uint64_t old = vvtd_get_reg_quad(vvtd, DMAR_IQA_REG);
> +
> + unmap_guest_pages(vvtd->inv_queue_base, 1 << DMA_IQA_QS(old));
Don't you also need to unmap this page(s) when QIE is disabled?
Thanks, Roger.
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxxx
https://lists.xenproject.org/mailman/listinfo/xen-devel
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |