x86/vMSI-X: avoid missing first unmask of vectors Recent changes to Linux result in there just being a single unmask operation prior to expecting the first interrupts to arrive. However, we've had a chicken-and-egg problem here: Qemu invokes xc_domain_update_msi_irq(), ultimately leading to msixtbl_pt_register(), upon seeing that first unmask operation. Yet for msixtbl_range() to return true (in order to msixtbl_write() to get invoked at all) msixtbl_pt_register() must have completed. Deal with this by snooping suitable writes in msixtbl_range() and triggering the invocation of msix_write_completion() from msixtbl_pt_register() when that happens in the context of a still in progress vector control field write. Note that the seemingly unrelated deletion of the redundant irq_desc->msi_desc checks in msixtbl_pt_register() is to make clear to any compiler version used that the "msi_desc" local variable isn't being used uninitialized. (Doing the same in msixtbl_pt_unregister() is just for consistency reasons.) Signed-off-by: Jan Beulich --- TODO: How to deal with REP MOVS to the MSI-X table (in msixtbl_range())? TODO: remove //temp-s TODO: (follow-up patch) defer and conditionalize msixtbl_init() invocation TODO: (follow-up patch) drop msixtbl_list_lock TODO: (follow-up patch) drop pci_msix_get_table_len() --- a/xen/arch/x86/hvm/vmsi.c +++ b/xen/arch/x86/hvm/vmsi.c @@ -276,6 +276,7 @@ static int msixtbl_write(struct vcpu *v, if ( !entry ) goto out; nr_entry = (address - entry->gtable) / PCI_MSIX_ENTRY_SIZE; +printk("%pv: write MSI-X#%u: [%lx]=%0*lx\n", v, nr_entry, address, (int)len * 2, val);//temp offset = address & (PCI_MSIX_ENTRY_SIZE - 1); if ( offset != PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET ) @@ -321,7 +322,16 @@ static int msixtbl_write(struct vcpu *v, ASSERT(msi_desc == desc->msi_desc); +{//temp + bool_t h = msi_desc->msi_attrib.host_masked; + bool_t g = msi_desc->msi_attrib.guest_masked; + bool_t ha = entry->pdev->msix->host_maskall; + bool_t ga = entry->pdev->msix->guest_maskall; guest_mask_msi_irq(desc, !!(val & PCI_MSIX_VECTOR_BITMASK)); + printk("%pv: MSI-X#%u %d(%d) / %d(%d) -> %d(%d) / %d(%d)\n", v, nr_entry, h, ha, g, ga, + msi_desc->msi_attrib.host_masked, entry->pdev->msix->host_maskall, + msi_desc->msi_attrib.guest_masked, entry->pdev->msix->guest_maskall); +} unlock: spin_unlock_irqrestore(&desc->lock, flags); @@ -330,6 +340,7 @@ unlock: out: rcu_read_unlock(&msixtbl_rcu_lock); +printk("%pv: write MSI-X [%lx] -> %d\n", v, address, r);//temp return r; } @@ -341,7 +352,20 @@ static int msixtbl_range(struct vcpu *v, desc = msixtbl_addr_to_desc(msixtbl_find_entry(v, addr), addr); rcu_read_unlock(&msixtbl_rcu_lock); - return !!desc; + if ( desc ) + return 1; + + if ( (addr & (PCI_MSIX_ENTRY_SIZE - 1)) == PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET ) + { + const ioreq_t *r = &v->arch.hvm_vcpu.hvm_io.io_req; + + ASSERT(r->type == IOREQ_TYPE_COPY); + if ( r->dir == IOREQ_WRITE && r->size == 4 && !r->data_is_ptr + && !(r->data & PCI_MSIX_VECTOR_BITMASK) ) + v->arch.hvm_vcpu.hvm_io.msix_snoop_address = addr; + } + + return 0; } static const struct hvm_mmio_ops msixtbl_mmio_ops = { @@ -360,6 +384,7 @@ static void add_msixtbl_entry(struct dom atomic_set(&entry->refcnt, 0); entry->table_len = pci_msix_get_table_len(pdev); +WARN_ON(entry->table_len != (pdev->msix->nr_entries * PCI_MSIX_ENTRY_SIZE));//temp entry->pdev = pdev; entry->gtable = (unsigned long) gtable; @@ -410,9 +435,6 @@ int msixtbl_pt_register(struct domain *d return r; } - if ( !irq_desc->msi_desc ) - goto out; - msi_desc = irq_desc->msi_desc; if ( !msi_desc ) goto out; @@ -437,6 +459,24 @@ found: out: spin_unlock_irq(&irq_desc->lock); xfree(new_entry); + + if ( !r ) + { + struct vcpu *v; + +printk("d%d: MSI-X#%u %u@%lx\n", d->domain_id, msi_desc->msi_attrib.entry_nr, entry->table_len, gtable);//temp + for_each_vcpu ( d, v ) + { +if(v->arch.hvm_vcpu.hvm_io.msix_snoop_address)//temp + printk("%pv: MSI-X#%u snoop %08lx\n", v, msi_desc->msi_attrib.entry_nr, v->arch.hvm_vcpu.hvm_io.msix_snoop_address);//temp + if ( v->arch.hvm_vcpu.hvm_io.msix_snoop_address == + (gtable + msi_desc->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE + + PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET) ) + v->arch.hvm_vcpu.hvm_io.msix_unmask_address = + v->arch.hvm_vcpu.hvm_io.msix_snoop_address; + } + } + return r; } @@ -457,9 +497,6 @@ void msixtbl_pt_unregister(struct domain if ( !irq_desc ) return; - if ( !irq_desc->msi_desc ) - goto out; - msi_desc = irq_desc->msi_desc; if ( !msi_desc ) goto out; @@ -522,6 +559,8 @@ void msix_write_completion(struct vcpu * { unsigned long ctrl_address = v->arch.hvm_vcpu.hvm_io.msix_unmask_address; + v->arch.hvm_vcpu.hvm_io.msix_snoop_address = 0; + if ( !ctrl_address ) return; --- a/xen/include/asm-x86/hvm/vcpu.h +++ b/xen/include/asm-x86/hvm/vcpu.h @@ -85,6 +85,7 @@ struct hvm_vcpu_io { bool_t mmio_retry; unsigned long msix_unmask_address; + unsigned long msix_snoop_address; const struct g2m_ioport *g2m_ioport; };