|
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [Xen-devel] [PATCH v2 06/10] x86/SVM: Add AVIC vmexit handlers
On 31/12/2016 05:45, Suravee Suthikulpanit wrote:
> VMEXIT_DO_NOACCEL:
> This oocurs when a guest access to an APIC register that cannot be
"occurs"
> accelerated by AVIC. In this case, Xen tries to emulate register accesses.
>
> This fault is also generated if an EOI isattempted when the highest priority
"is attempted"
> @@ -122,6 +125,8 @@ int svm_avic_dom_init(struct domain *d)
> clear_domain_page(_mfn(mfn));
> d->arch.hvm_domain.svm.avic_phy_apic_id_table_mfn = mfn;
>
> + spin_lock_init(&d->arch.hvm_domain.svm.avic_ldr_mode_lock);
> +
I think this hunk belongs in the previous patch.
> return ret;
> err_out:
> svm_avic_dom_destroy(d);
> @@ -222,6 +227,338 @@ int svm_avic_init_vmcb(struct vcpu *v)
> }
>
> /*
> + * Note:
> + * This function handles the AVIC_INCOMP_IPI #vmexit when AVIC is enabled.
> + * The hardware generates this fault when an IPI could not be delivered
> + * to all targeted guest virtual processors because at least one guest
> + * virtual processor was not allocated to a physical core at the time.
> + */
> +void svm_avic_vmexit_do_incomp_ipi(struct cpu_user_regs *regs)
> +{
> + struct vcpu *curr = current;
> + struct vmcb_struct *vmcb = curr->arch.hvm_svm.vmcb;
> + u32 icrh = vmcb->exitinfo1 >> 32;
> + u32 icrl = vmcb->exitinfo1;
> + u32 id = vmcb->exitinfo2 >> 32;
> + u32 index = vmcb->exitinfo2 && 0xFF;
> +
> + switch ( id )
> + {
> + case AVIC_INCMP_IPI_ERR_INVALID_INT_TYPE:
> + /*
> + * AVIC hardware handles the delivery of
> + * IPIs when the specified Message Type is Fixed
> + * (also known as fixed delivery mode) and
> + * the Trigger Mode is edge-triggered. The hardware
> + * also supports self and broadcast delivery modes
> + * specified via the Destination Shorthand(DSH)
> + * field of the ICRL. Logical and physical APIC ID
> + * formats are supported. All other IPI types cause
> + * a #VMEXIT, which needs to emulated.
> + */
> + vlapic_reg_write(curr, APIC_ICR2, icrh);
> + vlapic_reg_write(curr, APIC_ICR, icrl);
> + break;
Please have a newline between break and case.
> + case AVIC_INCMP_IPI_ERR_TARGET_NOT_RUN:
> + {
> + /*
> + * At this point, we expect that the AVIC HW has already
> + * set the appropriate IRR bits on the valid target
> + * vcpus. So, we just need to kick the appropriate vcpu.
> + */
> + struct vcpu *curc;
> + struct domain *curd = curr->domain;
currd is the more common name for this, and I would use a plain v
instead of curc as it isn't curr.
> + uint32_t dest = GET_xAPIC_DEST_FIELD(icrh);
> + uint32_t short_hand = icrl & APIC_SHORT_MASK;
> + bool dest_mode = !!(icrl & APIC_DEST_MASK);
> +
> + for_each_vcpu ( curd, curc )
> + {
> + if ( curc != curr &&
> + vlapic_match_dest(vcpu_vlapic(curc), vcpu_vlapic(curr),
> + short_hand, dest, dest_mode) )
> + {
> + vcpu_kick(curc);
> + break;
> + }
> + }
> + break;
> + }
> + case AVIC_INCMP_IPI_ERR_INV_TARGET:
> + dprintk(XENLOG_ERR,
> + "SVM: %s: Invalid IPI target (icr=%#08x:%08x, idx=%u)\n",
> + __func__, icrh, icrl, index);
> + break;
Shouldn't this case be emulated to raise appropriate APIC errors in the
guests view?
> + case AVIC_INCMP_IPI_ERR_INV_BK_PAGE:
> + dprintk(XENLOG_ERR,
> + "SVM: %s: Invalid bk page (icr=%#08x:%08x, idx=%u)\n",
> + __func__, icrh, icrl, index);
> + break;
> + default:
> + dprintk(XENLOG_ERR, "SVM: %s: Unknown IPI interception (%#x)\n",
> + __func__, id);
These two cases are an error in Xen's setup, or the hardware. They
should be gprintk()s (so as not to disappear in release builds), and
cause a domain_crash().
> + }
> +}
> +
> +static struct avic_log_apic_id_ent *
> +avic_get_logical_id_entry(const struct vcpu *v, u32 ldr, bool flat)
> +{
> + unsigned int index;
> + struct avic_log_apic_id_ent *avic_log_apid_id_table;
> + const struct svm_domain *d = &v->domain->arch.hvm_domain.svm;
> + unsigned int dest_id = GET_APIC_LOGICAL_ID(ldr);
> +
> + if ( !dest_id )
> + return NULL;
> +
> + if ( flat )
> + {
> + index = ffs(dest_id) - 1;
> + if ( index > 7 )
> + return NULL;
> + }
> + else
> + {
> + unsigned int cluster = (dest_id & 0xf0) >> 4;
> + int apic = ffs(dest_id & 0x0f) - 1;
> +
> + if ( (apic < 0) || (apic > 7) || (cluster >= 0xf) )
> + return NULL;
> + index = (cluster << 2) + apic;
> + }
> +
> + ASSERT(index <= 255);
> +
> + avic_log_apid_id_table = mfn_to_virt(d->avic_log_apic_id_table_mfn);
As with the phsyical table, this is buggy above 5TB, and should probably
be a global mapping to start with.
> +
> + return &avic_log_apid_id_table[index];
> +}
> +
> +static int avic_ldr_write(struct vcpu *v, u8 g_phy_id, u32 ldr, bool valid)
> +{
> + struct avic_log_apic_id_ent *entry, new_entry;
> + u32 *bp = avic_get_bk_page_entry(v, APIC_DFR);
> +
> + if ( !bp )
> + return -EINVAL;
> +
> + entry = avic_get_logical_id_entry(v, ldr, (*bp == APIC_DFR_FLAT));
> + if (!entry)
> + return -EINVAL;
> +
> + new_entry = *entry;
> + smp_rmb();
> + new_entry.guest_phy_apic_id = g_phy_id;
> + new_entry.valid = valid;
> + *entry = new_entry;
> + smp_wmb();
> +
> + return 0;
> +}
> +
> +static int avic_handle_ldr_update(struct vcpu *v)
> +{
> + int ret = 0;
> + u32 *ldr = avic_get_bk_page_entry(v, APIC_LDR);
> + u32 *apic_id_reg = avic_get_bk_page_entry(v, APIC_ID);
> +
> + if ( !ldr || !*ldr || !apic_id_reg )
> + return -EINVAL;
> +
> + ret = avic_ldr_write(v, GET_APIC_PHYSICAL_ID(*apic_id_reg), *ldr, true);
> + if ( ret && v->arch.hvm_svm.avic_last_ldr )
> + {
> + /*
> + * Note:
> + * In case of failure to update LDR register,
> + * we set the guest physical APIC ID to 0,
> + * and set the entry logical APID ID entry
> + * to invalid (false).
> + */
> + avic_ldr_write(v, 0, v->arch.hvm_svm.avic_last_ldr, false);
> + v->arch.hvm_svm.avic_last_ldr = 0;
> + }
> + else
> + {
> + /*
> + * Note:
> + * This saves the last valid LDR so that we
> + * know which entry in the local APIC ID
> + * to clean up when the LDR is updated.
> + */
> + v->arch.hvm_svm.avic_last_ldr = *ldr;
> + }
> +
> + return ret;
> +}
> +
> +static int avic_handle_apic_id_update(struct vcpu *v, bool init)
> +{
> + struct avic_phy_apic_id_ent *old, *new;
> + struct arch_svm_struct *s = &v->arch.hvm_svm;
> + u32 *apic_id_reg = avic_get_bk_page_entry(v, APIC_ID);
> +
> + if ( !apic_id_reg )
> + return -EINVAL;
> +
> + old = s->avic_last_phy_id;
> + ASSERT(old);
> +
> + new = avic_get_phy_apic_id_ent(v, GET_APIC_PHYSICAL_ID(*apic_id_reg));
> + if ( !new )
> + return 0;
> +
> + /* We need to move physical_id_entry to new offset */
> + *new = *old;
> + *((u64 *)old) = 0ULL;
> + s->avic_last_phy_id = new;
> +
> + /*
> + * Update the guest physical APIC ID in the logical
> + * APIC ID table entry if LDR is already setup.
> + */
> + if ( v->arch.hvm_svm.avic_last_ldr )
> + avic_handle_ldr_update(v);
> +
> + return 0;
> +}
> +
> +static int avic_handle_dfr_update(struct vcpu *v)
> +{
> + u32 mod;
> + struct svm_domain *d = &v->domain->arch.hvm_domain.svm;
> + u32 *dfr = avic_get_bk_page_entry(v, APIC_DFR);
> +
> + if ( !dfr )
> + return -EINVAL;
> +
> + mod = (*dfr >> 28) & 0xFu;
> +
> + spin_lock(&d->avic_ldr_mode_lock);
> + if ( d->avic_ldr_mode != mod )
> + {
> + /*
> + * We assume that all local APICs are using the same type.
> + * If LDR mode changes, we need to flush the domain AVIC logical
> + * APIC id table.
> + */
The logical APIC ID table is per-domain, yet LDR mode is per-vcpu. How
can these coexist if we flush the table like this? How would a
multi-vcpu domain actually change its mode without this logic in Xen
corrupting the table?
> + clear_domain_page(_mfn(d->avic_log_apic_id_table_mfn));
> + smp_wmb();
> + d->avic_ldr_mode = mod;
> + }
> + spin_unlock(&d->avic_ldr_mode_lock);
> +
> + if ( v->arch.hvm_svm.avic_last_ldr )
> + avic_handle_ldr_update(v);
> +
> + return 0;
> +}
> +
> +static int avic_unaccel_trap_write(struct vcpu *v)
> +{
> + struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
> + u32 offset = vmcb->exitinfo1 & AVIC_UNACCEL_ACCESS_OFFSET_MASK;
> + u32 *reg = avic_get_bk_page_entry(v, offset);
> +
> + if ( !reg )
> + return X86EMUL_UNHANDLEABLE;
> +
> + switch ( offset )
> + {
> + case APIC_ID:
> + if ( avic_handle_apic_id_update(v, false) )
> + return X86EMUL_UNHANDLEABLE;
> + break;
> + case APIC_LDR:
> + if ( avic_handle_ldr_update(v) )
> + return X86EMUL_UNHANDLEABLE;
> + break;
> + case APIC_DFR:
> + if ( avic_handle_dfr_update(v) )
> + return X86EMUL_UNHANDLEABLE;
> + break;
> + default:
> + break;
> + }
> +
> + vlapic_reg_write(v, offset, *reg);
> +
> + return X86EMUL_OKAY;
> +}
> +
> +/*
> + * Note:
> + * This function handles the AVIC_NOACCEL #vmexit when AVIC is enabled.
> + * The hardware generates this fault when :
> + * - A guest access to an APIC register that is not accelerated
> + * by AVIC hardware.
> + * - EOI is attempted when the highest priority in-service interrupt
> + * is level-triggered.
> + */
> +void svm_avic_vmexit_do_noaccel(struct cpu_user_regs *regs)
> +{
> + struct vcpu *curr = current;
> + struct vmcb_struct *vmcb = curr->arch.hvm_svm.vmcb;
> + u32 offset = vmcb->exitinfo1 & 0xFF0;
> + u32 rw = (vmcb->exitinfo1 >> 32) & 0x1;
> +
> + switch ( offset )
> + {
> + case APIC_ID:
> + case APIC_EOI:
> + case APIC_RRR:
> + case APIC_LDR:
> + case APIC_DFR:
> + case APIC_SPIV:
> + case APIC_ESR:
> + case APIC_ICR:
> + case APIC_LVTT:
> + case APIC_LVTTHMR:
> + case APIC_LVTPC:
> + case APIC_LVT0:
> + case APIC_LVT1:
> + case APIC_LVTERR:
> + case APIC_TMICT:
> + case APIC_TDCR:
Please use a bitmap in a similar way to hvm_x2apic_msr_read(). It
generates far more efficient code.
> + /*
> + * Handling AVIC Trap (intercept right after the access).
> + */
> + if ( !rw )
> + {
> + /*
> + * If a read trap happens, the CPU microcode does not
> + * implement the spec.
So it is all microcode then :)
> + */
> + BUG();
domain_crash() please, and a suitable gprintk(). There is no need to
take down the entire system, as non-avic guests should be able to
continue functioning.
> + }
> + if ( avic_unaccel_trap_write(curr) != X86EMUL_OKAY )
> + {
> + gprintk(XENLOG_ERR, "%s: Failed to handle trap write (%#x)\n",
> + __func__, offset);
> + return;
> + }
> + break;
> + default:
> + /*
> + * Handling AVIC Fault (intercept before the access).
> + */
> + if ( !rw )
> + {
> + u32 *entry = avic_get_bk_page_entry(curr, offset);
> +
> + if ( !entry )
> + return;
> +
> + *entry = vlapic_read_aligned(vcpu_vlapic(curr), offset);
> + }
> + hvm_emulate_one_vm_event(EMUL_KIND_NORMAL, TRAP_invalid_op,
> + HVM_DELIVER_NO_ERROR_CODE);
What is this doing here? I don't see any connection.
~Andrew
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
https://lists.xen.org/xen-devel
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |