[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [Xen-devel] [PATCH v2 06/10] x86/SVM: Add AVIC vmexit handlers
On 31/12/2016 05:45, Suravee Suthikulpanit wrote: > VMEXIT_DO_NOACCEL: > This oocurs when a guest access to an APIC register that cannot be "occurs" > accelerated by AVIC. In this case, Xen tries to emulate register accesses. > > This fault is also generated if an EOI isattempted when the highest priority "is attempted" > @@ -122,6 +125,8 @@ int svm_avic_dom_init(struct domain *d) > clear_domain_page(_mfn(mfn)); > d->arch.hvm_domain.svm.avic_phy_apic_id_table_mfn = mfn; > > + spin_lock_init(&d->arch.hvm_domain.svm.avic_ldr_mode_lock); > + I think this hunk belongs in the previous patch. > return ret; > err_out: > svm_avic_dom_destroy(d); > @@ -222,6 +227,338 @@ int svm_avic_init_vmcb(struct vcpu *v) > } > > /* > + * Note: > + * This function handles the AVIC_INCOMP_IPI #vmexit when AVIC is enabled. > + * The hardware generates this fault when an IPI could not be delivered > + * to all targeted guest virtual processors because at least one guest > + * virtual processor was not allocated to a physical core at the time. > + */ > +void svm_avic_vmexit_do_incomp_ipi(struct cpu_user_regs *regs) > +{ > + struct vcpu *curr = current; > + struct vmcb_struct *vmcb = curr->arch.hvm_svm.vmcb; > + u32 icrh = vmcb->exitinfo1 >> 32; > + u32 icrl = vmcb->exitinfo1; > + u32 id = vmcb->exitinfo2 >> 32; > + u32 index = vmcb->exitinfo2 && 0xFF; > + > + switch ( id ) > + { > + case AVIC_INCMP_IPI_ERR_INVALID_INT_TYPE: > + /* > + * AVIC hardware handles the delivery of > + * IPIs when the specified Message Type is Fixed > + * (also known as fixed delivery mode) and > + * the Trigger Mode is edge-triggered. The hardware > + * also supports self and broadcast delivery modes > + * specified via the Destination Shorthand(DSH) > + * field of the ICRL. Logical and physical APIC ID > + * formats are supported. All other IPI types cause > + * a #VMEXIT, which needs to emulated. > + */ > + vlapic_reg_write(curr, APIC_ICR2, icrh); > + vlapic_reg_write(curr, APIC_ICR, icrl); > + break; Please have a newline between break and case. > + case AVIC_INCMP_IPI_ERR_TARGET_NOT_RUN: > + { > + /* > + * At this point, we expect that the AVIC HW has already > + * set the appropriate IRR bits on the valid target > + * vcpus. So, we just need to kick the appropriate vcpu. > + */ > + struct vcpu *curc; > + struct domain *curd = curr->domain; currd is the more common name for this, and I would use a plain v instead of curc as it isn't curr. > + uint32_t dest = GET_xAPIC_DEST_FIELD(icrh); > + uint32_t short_hand = icrl & APIC_SHORT_MASK; > + bool dest_mode = !!(icrl & APIC_DEST_MASK); > + > + for_each_vcpu ( curd, curc ) > + { > + if ( curc != curr && > + vlapic_match_dest(vcpu_vlapic(curc), vcpu_vlapic(curr), > + short_hand, dest, dest_mode) ) > + { > + vcpu_kick(curc); > + break; > + } > + } > + break; > + } > + case AVIC_INCMP_IPI_ERR_INV_TARGET: > + dprintk(XENLOG_ERR, > + "SVM: %s: Invalid IPI target (icr=%#08x:%08x, idx=%u)\n", > + __func__, icrh, icrl, index); > + break; Shouldn't this case be emulated to raise appropriate APIC errors in the guests view? > + case AVIC_INCMP_IPI_ERR_INV_BK_PAGE: > + dprintk(XENLOG_ERR, > + "SVM: %s: Invalid bk page (icr=%#08x:%08x, idx=%u)\n", > + __func__, icrh, icrl, index); > + break; > + default: > + dprintk(XENLOG_ERR, "SVM: %s: Unknown IPI interception (%#x)\n", > + __func__, id); These two cases are an error in Xen's setup, or the hardware. They should be gprintk()s (so as not to disappear in release builds), and cause a domain_crash(). > + } > +} > + > +static struct avic_log_apic_id_ent * > +avic_get_logical_id_entry(const struct vcpu *v, u32 ldr, bool flat) > +{ > + unsigned int index; > + struct avic_log_apic_id_ent *avic_log_apid_id_table; > + const struct svm_domain *d = &v->domain->arch.hvm_domain.svm; > + unsigned int dest_id = GET_APIC_LOGICAL_ID(ldr); > + > + if ( !dest_id ) > + return NULL; > + > + if ( flat ) > + { > + index = ffs(dest_id) - 1; > + if ( index > 7 ) > + return NULL; > + } > + else > + { > + unsigned int cluster = (dest_id & 0xf0) >> 4; > + int apic = ffs(dest_id & 0x0f) - 1; > + > + if ( (apic < 0) || (apic > 7) || (cluster >= 0xf) ) > + return NULL; > + index = (cluster << 2) + apic; > + } > + > + ASSERT(index <= 255); > + > + avic_log_apid_id_table = mfn_to_virt(d->avic_log_apic_id_table_mfn); As with the phsyical table, this is buggy above 5TB, and should probably be a global mapping to start with. > + > + return &avic_log_apid_id_table[index]; > +} > + > +static int avic_ldr_write(struct vcpu *v, u8 g_phy_id, u32 ldr, bool valid) > +{ > + struct avic_log_apic_id_ent *entry, new_entry; > + u32 *bp = avic_get_bk_page_entry(v, APIC_DFR); > + > + if ( !bp ) > + return -EINVAL; > + > + entry = avic_get_logical_id_entry(v, ldr, (*bp == APIC_DFR_FLAT)); > + if (!entry) > + return -EINVAL; > + > + new_entry = *entry; > + smp_rmb(); > + new_entry.guest_phy_apic_id = g_phy_id; > + new_entry.valid = valid; > + *entry = new_entry; > + smp_wmb(); > + > + return 0; > +} > + > +static int avic_handle_ldr_update(struct vcpu *v) > +{ > + int ret = 0; > + u32 *ldr = avic_get_bk_page_entry(v, APIC_LDR); > + u32 *apic_id_reg = avic_get_bk_page_entry(v, APIC_ID); > + > + if ( !ldr || !*ldr || !apic_id_reg ) > + return -EINVAL; > + > + ret = avic_ldr_write(v, GET_APIC_PHYSICAL_ID(*apic_id_reg), *ldr, true); > + if ( ret && v->arch.hvm_svm.avic_last_ldr ) > + { > + /* > + * Note: > + * In case of failure to update LDR register, > + * we set the guest physical APIC ID to 0, > + * and set the entry logical APID ID entry > + * to invalid (false). > + */ > + avic_ldr_write(v, 0, v->arch.hvm_svm.avic_last_ldr, false); > + v->arch.hvm_svm.avic_last_ldr = 0; > + } > + else > + { > + /* > + * Note: > + * This saves the last valid LDR so that we > + * know which entry in the local APIC ID > + * to clean up when the LDR is updated. > + */ > + v->arch.hvm_svm.avic_last_ldr = *ldr; > + } > + > + return ret; > +} > + > +static int avic_handle_apic_id_update(struct vcpu *v, bool init) > +{ > + struct avic_phy_apic_id_ent *old, *new; > + struct arch_svm_struct *s = &v->arch.hvm_svm; > + u32 *apic_id_reg = avic_get_bk_page_entry(v, APIC_ID); > + > + if ( !apic_id_reg ) > + return -EINVAL; > + > + old = s->avic_last_phy_id; > + ASSERT(old); > + > + new = avic_get_phy_apic_id_ent(v, GET_APIC_PHYSICAL_ID(*apic_id_reg)); > + if ( !new ) > + return 0; > + > + /* We need to move physical_id_entry to new offset */ > + *new = *old; > + *((u64 *)old) = 0ULL; > + s->avic_last_phy_id = new; > + > + /* > + * Update the guest physical APIC ID in the logical > + * APIC ID table entry if LDR is already setup. > + */ > + if ( v->arch.hvm_svm.avic_last_ldr ) > + avic_handle_ldr_update(v); > + > + return 0; > +} > + > +static int avic_handle_dfr_update(struct vcpu *v) > +{ > + u32 mod; > + struct svm_domain *d = &v->domain->arch.hvm_domain.svm; > + u32 *dfr = avic_get_bk_page_entry(v, APIC_DFR); > + > + if ( !dfr ) > + return -EINVAL; > + > + mod = (*dfr >> 28) & 0xFu; > + > + spin_lock(&d->avic_ldr_mode_lock); > + if ( d->avic_ldr_mode != mod ) > + { > + /* > + * We assume that all local APICs are using the same type. > + * If LDR mode changes, we need to flush the domain AVIC logical > + * APIC id table. > + */ The logical APIC ID table is per-domain, yet LDR mode is per-vcpu. How can these coexist if we flush the table like this? How would a multi-vcpu domain actually change its mode without this logic in Xen corrupting the table? > + clear_domain_page(_mfn(d->avic_log_apic_id_table_mfn)); > + smp_wmb(); > + d->avic_ldr_mode = mod; > + } > + spin_unlock(&d->avic_ldr_mode_lock); > + > + if ( v->arch.hvm_svm.avic_last_ldr ) > + avic_handle_ldr_update(v); > + > + return 0; > +} > + > +static int avic_unaccel_trap_write(struct vcpu *v) > +{ > + struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb; > + u32 offset = vmcb->exitinfo1 & AVIC_UNACCEL_ACCESS_OFFSET_MASK; > + u32 *reg = avic_get_bk_page_entry(v, offset); > + > + if ( !reg ) > + return X86EMUL_UNHANDLEABLE; > + > + switch ( offset ) > + { > + case APIC_ID: > + if ( avic_handle_apic_id_update(v, false) ) > + return X86EMUL_UNHANDLEABLE; > + break; > + case APIC_LDR: > + if ( avic_handle_ldr_update(v) ) > + return X86EMUL_UNHANDLEABLE; > + break; > + case APIC_DFR: > + if ( avic_handle_dfr_update(v) ) > + return X86EMUL_UNHANDLEABLE; > + break; > + default: > + break; > + } > + > + vlapic_reg_write(v, offset, *reg); > + > + return X86EMUL_OKAY; > +} > + > +/* > + * Note: > + * This function handles the AVIC_NOACCEL #vmexit when AVIC is enabled. > + * The hardware generates this fault when : > + * - A guest access to an APIC register that is not accelerated > + * by AVIC hardware. > + * - EOI is attempted when the highest priority in-service interrupt > + * is level-triggered. > + */ > +void svm_avic_vmexit_do_noaccel(struct cpu_user_regs *regs) > +{ > + struct vcpu *curr = current; > + struct vmcb_struct *vmcb = curr->arch.hvm_svm.vmcb; > + u32 offset = vmcb->exitinfo1 & 0xFF0; > + u32 rw = (vmcb->exitinfo1 >> 32) & 0x1; > + > + switch ( offset ) > + { > + case APIC_ID: > + case APIC_EOI: > + case APIC_RRR: > + case APIC_LDR: > + case APIC_DFR: > + case APIC_SPIV: > + case APIC_ESR: > + case APIC_ICR: > + case APIC_LVTT: > + case APIC_LVTTHMR: > + case APIC_LVTPC: > + case APIC_LVT0: > + case APIC_LVT1: > + case APIC_LVTERR: > + case APIC_TMICT: > + case APIC_TDCR: Please use a bitmap in a similar way to hvm_x2apic_msr_read(). It generates far more efficient code. > + /* > + * Handling AVIC Trap (intercept right after the access). > + */ > + if ( !rw ) > + { > + /* > + * If a read trap happens, the CPU microcode does not > + * implement the spec. So it is all microcode then :) > + */ > + BUG(); domain_crash() please, and a suitable gprintk(). There is no need to take down the entire system, as non-avic guests should be able to continue functioning. > + } > + if ( avic_unaccel_trap_write(curr) != X86EMUL_OKAY ) > + { > + gprintk(XENLOG_ERR, "%s: Failed to handle trap write (%#x)\n", > + __func__, offset); > + return; > + } > + break; > + default: > + /* > + * Handling AVIC Fault (intercept before the access). > + */ > + if ( !rw ) > + { > + u32 *entry = avic_get_bk_page_entry(curr, offset); > + > + if ( !entry ) > + return; > + > + *entry = vlapic_read_aligned(vcpu_vlapic(curr), offset); > + } > + hvm_emulate_one_vm_event(EMUL_KIND_NORMAL, TRAP_invalid_op, > + HVM_DELIVER_NO_ERROR_CODE); What is this doing here? I don't see any connection. ~Andrew _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxx https://lists.xen.org/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |