[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [RFC PATCH 24/49] ARM: new VGIC: Add IRQ sync/flush framework



Hi,

On 13/02/18 12:41, Julien Grall wrote:
> Hi Andre,
> 
> On 09/02/18 14:39, Andre Przywara wrote:
>> Implement the framework for syncing IRQs between our emulation and the
>> list registers, which represent the guest's view of IRQs.
>> This is done in kvm_vgic_flush_hwstate and kvm_vgic_sync_hwstate, which
> 
> You probably want to update the names here.

Sure.

>> gets called on guest entry and exit.
>> The code talking to the actual GICv2/v3 hardware is added in the
>> following patches.
>>
>> This is based on Linux commit 0919e84c0fc1, written by Marc Zyngier.
>>
>> Signed-off-by: Andre Przywara <andre.przywara@xxxxxxxxxx>
>> ---
>>   xen/arch/arm/vgic/vgic.c | 246
>> +++++++++++++++++++++++++++++++++++++++++++++++
>>   xen/arch/arm/vgic/vgic.h |   2 +
>>   2 files changed, 248 insertions(+)
>>
>> diff --git a/xen/arch/arm/vgic/vgic.c b/xen/arch/arm/vgic/vgic.c
>> index a4efd1fd03..a1f77130d4 100644
>> --- a/xen/arch/arm/vgic/vgic.c
>> +++ b/xen/arch/arm/vgic/vgic.c
>> @@ -380,6 +380,252 @@ int vgic_inject_irq(struct domain *d, struct
>> vcpu *vcpu, unsigned int intid,
>>       return 0;
>>   }
>>   +/**
>> + * vgic_prune_ap_list - Remove non-relevant interrupts from the list
>> + *
>> + * @vcpu: The VCPU pointer
>> + *
>> + * Go over the list of "interesting" interrupts, and prune those that we
>> + * won't have to consider in the near future.
>> + */
>> +static void vgic_prune_ap_list(struct vcpu *vcpu)
>> +{
>> +    struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
>> +    struct vgic_irq *irq, *tmp;
>> +    unsigned long flags;
>> +
>> +retry:
>> +    spin_lock_irqsave(&vgic_cpu->ap_list_lock, flags);
>> +
>> +    list_for_each_entry_safe( irq, tmp, &vgic_cpu->ap_list_head,
>> ap_list )
> 
> See my comment on patch #22, this is where I am worry about going
> through the list every time we enter to the hypervisor from the guest.

I am not sure we can avoid this here, as this function is crucial to the
VGIC state machine.
We might later look into if we can avoid iterating through the whole
list or if we can shortcut some interrupts somehow, but I really would
be careful tinkering with this function too much.

>> +    {
>> +        struct vcpu *target_vcpu, *vcpuA, *vcpuB;
>> +
>> +        spin_lock(&irq->irq_lock);
>> +
>> +        BUG_ON(vcpu != irq->vcpu);
>> +
>> +        target_vcpu = vgic_target_oracle(irq);
>> +
>> +        if ( !target_vcpu )
>> +        {
>> +            /*
>> +             * We don't need to process this interrupt any
>> +             * further, move it off the list.
>> +             */
>> +            list_del(&irq->ap_list);
>> +            irq->vcpu = NULL;
>> +            spin_unlock(&irq->irq_lock);
>> +
>> +            /*
>> +             * This vgic_put_irq call matches the
>> +             * vgic_get_irq_kref in vgic_queue_irq_unlock,
>> +             * where we added the LPI to the ap_list. As
>> +             * we remove the irq from the list, we drop
>> +             * also drop the refcount.
>> +             */
>> +            vgic_put_irq(vcpu->domain, irq);
>> +            continue;
>> +        }
>> +
>> +        if ( target_vcpu == vcpu )
>> +        {
>> +            /* We're on the right CPU */
>> +            spin_unlock(&irq->irq_lock);
>> +            continue;
>> +        }
>> +
>> +        /* This interrupt looks like it has to be migrated. */
>> +
>> +        spin_unlock(&irq->irq_lock);
>> +        spin_unlock_irqrestore(&vgic_cpu->ap_list_lock, flags);
>> +
>> +        /*
>> +         * Ensure locking order by always locking the smallest
>> +         * ID first.
>> +         */
>> +        if ( vcpu->vcpu_id < target_vcpu->vcpu_id )
>> +        {
>> +            vcpuA = vcpu;
>> +            vcpuB = target_vcpu;
>> +        }
>> +        else
>> +        {
>> +            vcpuA = target_vcpu;
>> +            vcpuB = vcpu;
>> +        }
>> +
>> +        spin_lock_irqsave(&vcpuA->arch.vgic_cpu.ap_list_lock, flags);
>> +        spin_lock(&vcpuB->arch.vgic_cpu.ap_list_lock);
>> +        spin_lock(&irq->irq_lock);
>> +
>> +        /*
>> +         * If the affinity has been preserved, move the
>> +         * interrupt around. Otherwise, it means things have
>> +         * changed while the interrupt was unlocked, and we
>> +         * need to replay this.
>> +         *
>> +         * In all cases, we cannot trust the list not to have
>> +         * changed, so we restart from the beginning.
>> +         */
>> +        if ( target_vcpu == vgic_target_oracle(irq) )
>> +        {
>> +            struct vgic_cpu *new_cpu = &target_vcpu->arch.vgic_cpu;
>> +
>> +            list_del(&irq->ap_list);
>> +            irq->vcpu = target_vcpu;
>> +            list_add_tail(&irq->ap_list, &new_cpu->ap_list_head);
>> +        }
>> +
>> +        spin_unlock(&irq->irq_lock);
>> +        spin_unlock(&vcpuB->arch.vgic_cpu.ap_list_lock);
>> +        spin_unlock_irqrestore(&vcpuA->arch.vgic_cpu.ap_list_lock,
>> flags);
>> +        goto retry;
>> +    }
>> +
>> +    spin_unlock_irqrestore(&vgic_cpu->ap_list_lock, flags);
>> +}
>> +
>> +static inline void vgic_fold_lr_state(struct vcpu *vcpu)
>> +{
>> +}
>> +
>> +/* Requires the irq_lock to be held. */
>> +static inline void vgic_populate_lr(struct vcpu *vcpu,
>> +                                    struct vgic_irq *irq, int lr)
>> +{
>> +    ASSERT(spin_is_locked(&irq->irq_lock));
>> +}
>> +
>> +static inline void vgic_clear_lr(struct vcpu *vcpu, int lr)
>> +{
>> +}
>> +
>> +static inline void vgic_set_underflow(struct vcpu *vcpu)
>> +{
>> +}
>> +
>> +/* Requires the ap_list_lock to be held. */
>> +static int compute_ap_list_depth(struct vcpu *vcpu)
>> +{
>> +    struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
>> +    struct vgic_irq *irq;
>> +    int count = 0;
>> +
>> +    ASSERT(spin_is_locked(&vgic_cpu->ap_list_lock));
>> +
>> +    list_for_each_entry(irq, &vgic_cpu->ap_list_head, ap_list)
> 
> Here another example.

This can be short cut, indeed. The only place we call this function is
where we compare it against the number of LRs below.
So we don't need to know the actual number of elements, but could bail
out once we reached the number of LRs.
This function here would then return a bool, comparing internally
against the number of LRs already.

>> +    {
>> +        spin_lock(&irq->irq_lock);
>> +        /* GICv2 SGIs can count for more than one... */
>> +        if ( vgic_irq_is_sgi(irq->intid) && irq->source )
>> +            count += hweight8(irq->source);
>> +        else
>> +            count++;
>> +        spin_unlock(&irq->irq_lock);
>> +    }
>> +    return count;
>> +}
>> +
>> +/* Requires the VCPU's ap_list_lock to be held. */
>> +static void vgic_flush_lr_state(struct vcpu *vcpu)
>> +{
>> +    struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
>> +    struct vgic_irq *irq;
>> +    int count = 0;
>> +
>> +    ASSERT(spin_is_locked(&vgic_cpu->ap_list_lock));
>> +
>> +    if ( compute_ap_list_depth(vcpu) > gic_get_nr_lrs() )
>> +        vgic_sort_ap_list(vcpu);
>> +
>> +    list_for_each_entry( irq, &vgic_cpu->ap_list_head, ap_list )
>> +    {
>> +        spin_lock(&irq->irq_lock);
>> +
>> +        if ( unlikely(vgic_target_oracle(irq) != vcpu) )
>> +            goto next;
>> +
>> +        /*
>> +         * If we get an SGI with multiple sources, try to get
>> +         * them in all at once.
>> +         */
>> +        do
>> +        {
>> +            vgic_populate_lr(vcpu, irq, count++);
>> +        } while ( irq->source && count < gic_get_nr_lrs() );
>> +
>> +next:
>> +        spin_unlock(&irq->irq_lock);
>> +
>> +        if ( count == gic_get_nr_lrs() )
>> +        {
>> +            if ( !list_is_last(&irq->ap_list, &vgic_cpu->ap_list_head) )
>> +                vgic_set_underflow(vcpu);
>> +            break;
>> +        }
>> +    }
>> +
>> +    vcpu->arch.vgic_cpu.used_lrs = count;
>> +
>> +    /* Nuke remaining LRs */
>> +    for ( ; count < gic_get_nr_lrs(); count++)
>> +        vgic_clear_lr(vcpu, count);
>> +}
>> +
>> +/*
>> + * gic_clear_lrs() - Update the VGIC state from hardware after a
>> guest's run.
>> + * @vcpu: the VCPU.
>> + *
>> + * Sync back the hardware VGIC state after the guest has run, into our
>> + * VGIC emulation structures, It reads the LRs and updates the
>> respective
>> + * struct vgic_irq, taking level/edge into account.
>> + * This is the high level function which takes care of the conditions,
>> + * also bails out early if there were no interrupts queued.
>> + * Was: kvm_vgic_sync_hwstate()
>> + */
>> +void gic_clear_lrs(struct vcpu *vcpu)
> 
> I think I would prefer if we stick with the KVM name.

Yes, please! ;-)
I found those names always confusing, especially gic_inject(). To be
honest I never know which is which in the KVM case as well (sync vs.
flush), so I was wondering if we call both the entry and the exit
handler "sync", but denote the direction, like:
vgic_sync_from_lrs() and vgic_sync_to_lrs().

Cheers,
Andre.

>> +{
>> +    struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
>> +
>> +    /* An empty ap_list_head implies used_lrs == 0 */
>> +    if ( list_empty(&vcpu->arch.vgic_cpu.ap_list_head) )
>> +        return;
>> +
>> +    if ( vgic_cpu->used_lrs )
>> +        vgic_fold_lr_state(vcpu);
>> +    vgic_prune_ap_list(vcpu);
>> +}
>> +
>> +/*
>> + * gic_inject() - flush the emulation state into the hardware on
>> guest entry
>> + *
>> + * Before we enter a guest, we have to translate the virtual GIC
>> state of a
>> + * VCPU into the GIC virtualization hardware registers, namely the LRs.
>> + * This is the high level function which takes care about the conditions
>> + * and the locking, also bails out early if there are no interrupts
>> queued.
>> + * Was: kvm_vgic_flush_hwstate()
> 
> Ditto.
> 
>> + */
>> +void gic_inject(void)
>> +{
>> +    /*
>> +     * If there are no virtual interrupts active or pending for this
>> +     * VCPU, then there is no work to do and we can bail out without
>> +     * taking any lock.  There is a potential race with someone
>> injecting
>> +     * interrupts to the VCPU, but it is a benign race as the VCPU will
>> +     * either observe the new interrupt before or after doing this
>> check,
>> +     * and introducing additional synchronization mechanism doesn't
>> change
>> +     * this.
>> +     */
>> +    if ( list_empty(&current->arch.vgic_cpu.ap_list_head) )
>> +        return;
>> +
>> +    ASSERT(!local_irq_is_enabled());
>> +
>> +    spin_lock(&current->arch.vgic_cpu.ap_list_lock);
>> +    vgic_flush_lr_state(current);
>> +    spin_unlock(&current->arch.vgic_cpu.ap_list_lock);
>> +}
>>   /*
>>    * Local variables:
>>    * mode: C
>> diff --git a/xen/arch/arm/vgic/vgic.h b/xen/arch/arm/vgic/vgic.h
>> index 5127739f0f..47fc58b81e 100644
>> --- a/xen/arch/arm/vgic/vgic.h
>> +++ b/xen/arch/arm/vgic/vgic.h
>> @@ -17,6 +17,8 @@
>>   #ifndef __XEN_ARM_VGIC_NEW_H__
>>   #define __XEN_ARM_VGIC_NEW_H__
>>   +#define vgic_irq_is_sgi(intid) ((intid) < VGIC_NR_SGIS)
>> +
>>   static inline bool irq_is_pending(struct vgic_irq *irq)
>>   {
>>       if ( irq->config == VGIC_CONFIG_EDGE )
>>
> 
> Cheers,
> 

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxxx
https://lists.xenproject.org/mailman/listinfo/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.