[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [PATCH v2 06/10] x86/SVM: Add AVIC vmexit handlers



On 31/12/2016 05:45, Suravee Suthikulpanit wrote:
> VMEXIT_DO_NOACCEL:
> This oocurs when a guest access to an APIC register that cannot be

"occurs"

> accelerated by AVIC. In this case, Xen tries to emulate register accesses.
>
> This fault is also generated if an EOI isattempted when the highest priority

"is attempted"

> @@ -122,6 +125,8 @@ int svm_avic_dom_init(struct domain *d)
>      clear_domain_page(_mfn(mfn));
>      d->arch.hvm_domain.svm.avic_phy_apic_id_table_mfn = mfn;
>  
> +    spin_lock_init(&d->arch.hvm_domain.svm.avic_ldr_mode_lock);
> +

I think this hunk belongs in the previous patch.

>      return ret;
>   err_out:
>      svm_avic_dom_destroy(d);
> @@ -222,6 +227,338 @@ int svm_avic_init_vmcb(struct vcpu *v)
>  }
>  
>  /*
> + * Note:
> + * This function handles the AVIC_INCOMP_IPI #vmexit when AVIC is enabled.
> + * The hardware generates this fault when an IPI could not be delivered
> + * to all targeted guest virtual processors because at least one guest
> + * virtual processor was not allocated to a physical core at the time.
> + */
> +void svm_avic_vmexit_do_incomp_ipi(struct cpu_user_regs *regs)
> +{
> +    struct vcpu *curr = current;
> +    struct vmcb_struct *vmcb = curr->arch.hvm_svm.vmcb;
> +    u32 icrh = vmcb->exitinfo1 >> 32;
> +    u32 icrl = vmcb->exitinfo1;
> +    u32 id = vmcb->exitinfo2 >> 32;
> +    u32 index = vmcb->exitinfo2 && 0xFF;
> +
> +    switch ( id )
> +    {
> +    case AVIC_INCMP_IPI_ERR_INVALID_INT_TYPE:
> +        /*
> +         * AVIC hardware handles the delivery of
> +         * IPIs when the specified Message Type is Fixed
> +         * (also known as fixed delivery mode) and
> +         * the Trigger Mode is edge-triggered. The hardware
> +         * also supports self and broadcast delivery modes
> +         * specified via the Destination Shorthand(DSH)
> +         * field of the ICRL. Logical and physical APIC ID
> +         * formats are supported. All other IPI types cause
> +         * a #VMEXIT, which needs to emulated.
> +         */
> +        vlapic_reg_write(curr, APIC_ICR2, icrh);
> +        vlapic_reg_write(curr, APIC_ICR, icrl);
> +        break;

Please have a newline between break and case.

> +    case AVIC_INCMP_IPI_ERR_TARGET_NOT_RUN:
> +    {
> +        /*
> +         * At this point, we expect that the AVIC HW has already
> +         * set the appropriate IRR bits on the valid target
> +         * vcpus. So, we just need to kick the appropriate vcpu.
> +         */
> +        struct vcpu *curc;
> +        struct domain *curd = curr->domain;

currd is the more common name for this, and I would use a plain v
instead of curc as it isn't curr.

> +        uint32_t dest = GET_xAPIC_DEST_FIELD(icrh);
> +        uint32_t short_hand = icrl & APIC_SHORT_MASK;
> +        bool dest_mode = !!(icrl & APIC_DEST_MASK);
> +
> +        for_each_vcpu ( curd, curc )
> +        {
> +            if ( curc != curr &&
> +                 vlapic_match_dest(vcpu_vlapic(curc), vcpu_vlapic(curr),
> +                                   short_hand, dest, dest_mode) )
> +            {
> +                vcpu_kick(curc);
> +                break;
> +            }
> +        }
> +        break;
> +    }
> +    case AVIC_INCMP_IPI_ERR_INV_TARGET:
> +        dprintk(XENLOG_ERR,
> +                "SVM: %s: Invalid IPI target (icr=%#08x:%08x, idx=%u)\n",
> +                __func__, icrh, icrl, index);
> +        break;

Shouldn't this case be emulated to raise appropriate APIC errors in the
guests view?

> +    case AVIC_INCMP_IPI_ERR_INV_BK_PAGE:
> +        dprintk(XENLOG_ERR,
> +                "SVM: %s: Invalid bk page (icr=%#08x:%08x, idx=%u)\n",
> +                __func__, icrh, icrl, index);
> +        break;
> +    default:
> +        dprintk(XENLOG_ERR, "SVM: %s: Unknown IPI interception (%#x)\n",
> +                __func__, id);

These two cases are an error in Xen's setup, or the hardware.  They
should be gprintk()s (so as not to disappear in release builds), and
cause a domain_crash().

> +    }
> +}
> +
> +static struct avic_log_apic_id_ent *
> +avic_get_logical_id_entry(const struct vcpu *v, u32 ldr, bool flat)
> +{
> +    unsigned int index;
> +    struct avic_log_apic_id_ent *avic_log_apid_id_table;
> +    const struct svm_domain *d = &v->domain->arch.hvm_domain.svm;
> +    unsigned int dest_id = GET_APIC_LOGICAL_ID(ldr);
> +
> +    if ( !dest_id )
> +        return NULL;
> +
> +    if ( flat )
> +    {
> +        index = ffs(dest_id) - 1;
> +        if ( index > 7 )
> +            return NULL;
> +    }
> +    else
> +    {
> +        unsigned int cluster = (dest_id & 0xf0) >> 4;
> +        int apic = ffs(dest_id & 0x0f) - 1;
> +
> +        if ( (apic < 0) || (apic > 7) || (cluster >= 0xf) )
> +            return NULL;
> +        index = (cluster << 2) + apic;
> +    }
> +
> +    ASSERT(index <= 255);
> +
> +    avic_log_apid_id_table = mfn_to_virt(d->avic_log_apic_id_table_mfn);

As with the phsyical table, this is buggy above 5TB, and should probably
be a global mapping to start with.

> +
> +    return &avic_log_apid_id_table[index];
> +}
> +
> +static int avic_ldr_write(struct vcpu *v, u8 g_phy_id, u32 ldr, bool valid)
> +{
> +    struct avic_log_apic_id_ent *entry, new_entry;
> +    u32 *bp = avic_get_bk_page_entry(v, APIC_DFR);
> +
> +    if ( !bp )
> +        return -EINVAL;
> +
> +    entry = avic_get_logical_id_entry(v, ldr, (*bp == APIC_DFR_FLAT));
> +    if (!entry)
> +        return -EINVAL;
> +
> +    new_entry = *entry;
> +    smp_rmb();
> +    new_entry.guest_phy_apic_id = g_phy_id;
> +    new_entry.valid = valid;
> +    *entry = new_entry;
> +    smp_wmb();
> +
> +    return 0;
> +}
> +
> +static int avic_handle_ldr_update(struct vcpu *v)
> +{
> +    int ret = 0;
> +    u32 *ldr = avic_get_bk_page_entry(v, APIC_LDR);
> +    u32 *apic_id_reg = avic_get_bk_page_entry(v, APIC_ID);
> +
> +    if ( !ldr || !*ldr || !apic_id_reg )
> +        return -EINVAL;
> +
> +    ret = avic_ldr_write(v, GET_APIC_PHYSICAL_ID(*apic_id_reg), *ldr, true);
> +    if ( ret && v->arch.hvm_svm.avic_last_ldr )
> +    {
> +        /*
> +         * Note:
> +         * In case of failure to update LDR register,
> +         * we set the guest physical APIC ID to 0,
> +         * and set the entry logical APID ID entry
> +         * to invalid (false).
> +         */
> +        avic_ldr_write(v, 0, v->arch.hvm_svm.avic_last_ldr, false);
> +        v->arch.hvm_svm.avic_last_ldr = 0;
> +    }
> +    else
> +    {
> +        /*
> +         * Note:
> +         * This saves the last valid LDR so that we
> +         * know which entry in the local APIC ID
> +         * to clean up when the LDR is updated.
> +         */
> +        v->arch.hvm_svm.avic_last_ldr = *ldr;
> +    }
> +
> +    return ret;
> +}
> +
> +static int avic_handle_apic_id_update(struct vcpu *v, bool init)
> +{
> +    struct avic_phy_apic_id_ent *old, *new;
> +    struct arch_svm_struct *s = &v->arch.hvm_svm;
> +    u32 *apic_id_reg = avic_get_bk_page_entry(v, APIC_ID);
> +
> +    if ( !apic_id_reg )
> +        return -EINVAL;
> +
> +    old = s->avic_last_phy_id;
> +    ASSERT(old);
> +
> +    new = avic_get_phy_apic_id_ent(v, GET_APIC_PHYSICAL_ID(*apic_id_reg));
> +    if ( !new )
> +        return 0;
> +
> +    /* We need to move physical_id_entry to new offset */
> +    *new = *old;
> +    *((u64 *)old) = 0ULL;
> +    s->avic_last_phy_id = new;
> +
> +    /*
> +     * Update the guest physical APIC ID in the logical
> +     * APIC ID table entry if LDR is already setup.
> +     */
> +    if ( v->arch.hvm_svm.avic_last_ldr )
> +        avic_handle_ldr_update(v);
> +
> +    return 0;
> +}
> +
> +static int avic_handle_dfr_update(struct vcpu *v)
> +{
> +    u32 mod;
> +    struct svm_domain *d = &v->domain->arch.hvm_domain.svm;
> +    u32 *dfr = avic_get_bk_page_entry(v, APIC_DFR);
> +
> +    if ( !dfr )
> +        return -EINVAL;
> +
> +    mod = (*dfr >> 28) & 0xFu;
> +
> +    spin_lock(&d->avic_ldr_mode_lock);
> +    if ( d->avic_ldr_mode != mod )
> +    {
> +        /*
> +         * We assume that all local APICs are using the same type.
> +         * If LDR mode changes, we need to flush the domain AVIC logical
> +         * APIC id table.
> +         */

The logical APIC ID table is per-domain, yet LDR mode is per-vcpu.  How
can these coexist if we flush the table like this?  How would a
multi-vcpu domain actually change its mode without this logic in Xen
corrupting the table?

> +        clear_domain_page(_mfn(d->avic_log_apic_id_table_mfn));
> +        smp_wmb();
> +        d->avic_ldr_mode = mod;
> +    }
> +    spin_unlock(&d->avic_ldr_mode_lock);
> +
> +    if ( v->arch.hvm_svm.avic_last_ldr )
> +        avic_handle_ldr_update(v);
> +
> +    return 0;
> +}
> +
> +static int avic_unaccel_trap_write(struct vcpu *v)
> +{
> +    struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
> +    u32 offset = vmcb->exitinfo1 & AVIC_UNACCEL_ACCESS_OFFSET_MASK;
> +    u32 *reg = avic_get_bk_page_entry(v, offset);
> +
> +    if ( !reg )
> +        return X86EMUL_UNHANDLEABLE;
> +
> +    switch ( offset )
> +    {
> +    case APIC_ID:
> +        if ( avic_handle_apic_id_update(v, false) )
> +            return X86EMUL_UNHANDLEABLE;
> +        break;
> +    case APIC_LDR:
> +        if ( avic_handle_ldr_update(v) )
> +            return X86EMUL_UNHANDLEABLE;
> +        break;
> +    case APIC_DFR:
> +        if ( avic_handle_dfr_update(v) )
> +            return X86EMUL_UNHANDLEABLE;
> +        break;
> +    default:
> +        break;
> +    }
> +
> +    vlapic_reg_write(v, offset, *reg);
> +
> +    return X86EMUL_OKAY;
> +}
> +
> +/*
> + * Note:
> + * This function handles the AVIC_NOACCEL #vmexit when AVIC is enabled.
> + * The hardware generates this fault when :
> + * -  A guest access to an APIC register that is not accelerated
> + *    by AVIC hardware.
> + * - EOI is attempted when the highest priority in-service interrupt
> + *   is level-triggered.
> + */
> +void svm_avic_vmexit_do_noaccel(struct cpu_user_regs *regs)
> +{
> +    struct vcpu *curr = current;
> +    struct vmcb_struct *vmcb = curr->arch.hvm_svm.vmcb;
> +    u32 offset = vmcb->exitinfo1 & 0xFF0;
> +    u32 rw = (vmcb->exitinfo1 >> 32) & 0x1;
> +
> +    switch ( offset )
> +    {
> +    case APIC_ID:
> +    case APIC_EOI:
> +    case APIC_RRR:
> +    case APIC_LDR:
> +    case APIC_DFR:
> +    case APIC_SPIV:
> +    case APIC_ESR:
> +    case APIC_ICR:
> +    case APIC_LVTT:
> +    case APIC_LVTTHMR:
> +    case APIC_LVTPC:
> +    case APIC_LVT0:
> +    case APIC_LVT1:
> +    case APIC_LVTERR:
> +    case APIC_TMICT:
> +    case APIC_TDCR:

Please use a bitmap in a similar way to hvm_x2apic_msr_read().  It
generates far more efficient code.

> +        /*
> +         * Handling AVIC Trap (intercept right after the access).
> +         */
> +        if ( !rw )
> +        {
> +            /*
> +             * If a read trap happens, the CPU microcode does not
> +             * implement the spec.

So it is all microcode then :)

> +             */
> +            BUG();

domain_crash() please, and a suitable gprintk().  There is no need to
take down the entire system, as non-avic guests should be able to
continue functioning.

> +        }
> +        if ( avic_unaccel_trap_write(curr) != X86EMUL_OKAY )
> +        {
> +            gprintk(XENLOG_ERR, "%s: Failed to handle trap write (%#x)\n",
> +                    __func__, offset);
> +            return;
> +        }
> +        break;
> +    default:
> +        /*
> +         * Handling AVIC Fault (intercept before the access).
> +         */
> +        if ( !rw )
> +        {
> +            u32 *entry = avic_get_bk_page_entry(curr, offset);
> +
> +            if ( !entry )
> +                return;
> +
> +            *entry = vlapic_read_aligned(vcpu_vlapic(curr), offset);
> +        }
> +        hvm_emulate_one_vm_event(EMUL_KIND_NORMAL, TRAP_invalid_op,
> +                                 HVM_DELIVER_NO_ERROR_CODE);

What is this doing here?  I don't see any connection.

~Andrew

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
https://lists.xen.org/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.