On 11/11/13 12:33, Wu, Feng wrote:
Patch #1: For Xen itself
From 5e89c4a3f16d3556294f276039683dcf8abaeb84 Mon Sep 17 00:00:00 2001
From: Feng Wu <feng.wu@xxxxxxxxx>
Date: Fri, 1 Nov 2013 00:51:46 -0400
Subject: [PATCH] x86: properly handle MSI-X unmask operation from guests
For a pass-through device with MSI-x capability, when guest tries
to unmask the MSI-x interrupt for the passed through device, xen
doesn't clear the mask bit for MSI-x in hardware in the following
scenario, which will cause network disconnection:
1. Guest masks the MSI-x interrupt
2. Guest updates the address and data for it
3. Guest unmasks the MSI-x interrupt (This is the problematic step)
In the step #3 above, Xen doesn't handle it well. When guest tries
to unmask MSI-X interrupt, it traps to Xen, Xen just returns to Qemu
if it notices that address or data has been modified by guest before,
then Qemu will update Xen with the latest value of address/data by
hypercall. However, in this whole process, the MSI-X interrupt unmask
operation is missing, which means Xen doesn't clear the mask bit in
hardware for the MSI-X interrupt, so it remains disabled, that is why
it loses the network connection.
This patch fixes this issue.
Signed-off-by: Feng Wu <feng.wu@xxxxxxxxx>
---
tools/libxc/xc_domain.c | 4 ++++
tools/libxc/xenctrl.h | 2 ++
xen/arch/x86/hvm/vmsi.c | 14 ++++++++++++++
xen/drivers/passthrough/io.c | 7 +++++++
xen/include/public/domctl.h | 2 ++
xen/include/xen/pci.h | 2 ++
6 files changed, 31 insertions(+), 0 deletions(-)
diff --git a/tools/libxc/xc_domain.c b/tools/libxc/xc_domain.c
index 2cea6e3..d8e583e 100644
--- a/tools/libxc/xc_domain.c
+++ b/tools/libxc/xc_domain.c
@@ -1342,6 +1342,8 @@ int xc_domain_update_msi_irq(
uint32_t gvec,
uint32_t pirq,
uint32_t gflags,
+ uint32_t vector_ctrl,
+ int nr_entry,
uint64_t gtable)
{
int rc;
@@ -1359,6 +1361,8 @@ int xc_domain_update_msi_irq(
bind->u.msi.gvec = gvec;
bind->u.msi.gflags = gflags;
bind->u.msi.gtable = gtable;
+ bind->u.msi.vector_ctrl = vector_ctrl;
+ bind->u.msi.nr_entry = nr_entry;
rc = do_domctl(xch, &domctl);
return rc;
diff --git a/tools/libxc/xenctrl.h b/tools/libxc/xenctrl.h
index 8cf3f3b..9b972b3 100644
--- a/tools/libxc/xenctrl.h
+++ b/tools/libxc/xenctrl.h
@@ -1781,6 +1781,8 @@ int xc_domain_update_msi_irq(
uint32_t gvec,
uint32_t pirq,
uint32_t gflags,
+ uint32_t vector_ctrl,
+ int nr_entry,
uint64_t gtable);
int xc_domain_unbind_msi_irq(xc_interface *xch,
diff --git a/xen/arch/x86/hvm/vmsi.c b/xen/arch/x86/hvm/vmsi.c
index 4826b4a..5cee099 100644
--- a/xen/arch/x86/hvm/vmsi.c
+++ b/xen/arch/x86/hvm/vmsi.c
@@ -528,3 +528,17 @@ void msixtbl_pt_cleanup(struct domain *d)
spin_unlock(&d->arch.hvm_domain.msixtbl_list_lock);
local_irq_restore(flags);
}
+
+int msixtbl_unmask(struct vcpu *v, unsigned long table_base,
+ unsigned int nr_entry)
+{
+ unsigned long ctrl_address;
+
+ ctrl_address = table_base + nr_entry * PCI_MSIX_ENTRY_SIZE +
+ PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET;
+
+ if (msixtbl_write(v, ctrl_address, 4, 0) != X86EMUL_OKAY )
+ return -1;
+
+ return 0;
+}
diff --git a/xen/drivers/passthrough/io.c b/xen/drivers/passthrough/io.c
index f64e4ac..8039b72 100644
--- a/xen/drivers/passthrough/io.c
+++ b/xen/drivers/passthrough/io.c
@@ -193,6 +193,13 @@ int pt_irq_create_bind(
spin_unlock(&d->event_lock);
if ( dest_vcpu_id >= 0 )
hvm_migrate_pirqs(d->vcpu[dest_vcpu_id]);
+
+ if ((pt_irq_bind->u.msi.vector_ctrl & PCI_MSIX_VECTOR_BITMASK) == 0) {
+ rc = msixtbl_unmask(d->vcpu[0], pt_irq_bind->u.msi.gtable,
+ pt_irq_bind->u.msi.nr_entry);
+ if (rc)
+ return -EBUSY;
+ }
}
else
{
diff --git a/xen/include/public/domctl.h b/xen/include/public/domctl.h
index d4e479f..7a31c28 100644
--- a/xen/include/public/domctl.h
+++ b/xen/include/public/domctl.h
@@ -503,6 +503,8 @@ struct xen_domctl_bind_pt_irq {
struct {
uint8_t gvec;
uint32_t gflags;
+ uint32_t vector_ctrl;
+ int nr_entry;
uint64_aligned_t gtable;
} msi;
} u;
NACK - you absolutely cannot change the ABI like this.
At the very least, you must bump XEN_DOMCTL_INTERFACE_VERSION.
~Andrew
diff --git a/xen/include/xen/pci.h b/xen/include/xen/pci.h
index cadb525..1eec625 100644
--- a/xen/include/xen/pci.h
+++ b/xen/include/xen/pci.h
@@ -147,5 +147,7 @@ struct pirq;
int msixtbl_pt_register(struct domain *, struct pirq *, uint64_t gtable);
void msixtbl_pt_unregister(struct domain *, struct pirq *);
void msixtbl_pt_cleanup(struct domain *d);
+int msixtbl_unmask(struct vcpu *v, unsigned long table_base,
+ unsigned int nr_entry);
#endif /* __XEN_PCI_H__ */
--
1.7.1
Patch #2: For Qemu-xen:
From 0c3d2e73b06b7ccf357163c75dedf537897c6a21 Mon Sep 17 00:00:00 2001
From: Feng Wu <feng.wu@xxxxxxxxx>
Date: Wed, 6 Nov 2013 01:26:58 -0500
Subject: [PATCH] x86: properly handle MSI-X unmask operation from guests
For a pass-through device with MSI-x capability, when guest tries
to unmask the MSI-x interrupt for the passed through device, xen
doesn't clear the mask bit for MSI-x in hardware in the following
scenario, which will cause network disconnection:
1. Guest masks the MSI-x interrupt
2. Guest updates the address and data for it
3. Guest unmasks the MSI-x interrupt (This is the problematic step)
In the step #3 above, Xen doesn't handle it well. When guest tries
to unmask MSI-X interrupt, it traps to Xen, Xen just returns to Qemu
if it notices that address or data has been modified by guest before,
then Qemu will update Xen with the latest value of address/data by
hypercall. However, in this whole process, the MSI-X interrupt unmask
operation is missing, which means Xen doesn't clear the mask bit in
hardware for the MSI-X interrupt, so it remains disabled, that is why
it loses the network connection.
This patch fixes this issue.
Signed-off-by: Feng Wu <feng.wu@xxxxxxxxx>
---
hw/xen_pt_msi.c | 37 +++++++++++++++++++++++--------------
1 files changed, 23 insertions(+), 14 deletions(-)
diff --git a/hw/xen_pt_msi.c b/hw/xen_pt_msi.c
index db757cd..bacf2e9 100644
--- a/hw/xen_pt_msi.c
+++ b/hw/xen_pt_msi.c
@@ -143,6 +143,7 @@ static int msi_msix_setup(XenPCIPassthroughState *s,
static int msi_msix_update(XenPCIPassthroughState *s,
uint64_t addr,
uint32_t data,
+ uint32_t vector_ctrl,
int pirq,
bool is_msix,
int msix_entry,
@@ -162,8 +163,8 @@ static int msi_msix_update(XenPCIPassthroughState *s,
table_addr = s->msix->mmio_base_addr;
}
- rc = xc_domain_update_msi_irq(xen_xc, xen_domid, gvec,
- pirq, gflags, table_addr);
+ rc = xc_domain_update_msi_irq(xen_xc, xen_domid, gvec, pirq, gflags,
+ vector_ctrl, msix_entry, table_addr);
if (rc) {
XEN_PT_ERR(d, "Updating of MSI%s failed. (rc: %d)\n",
@@ -264,7 +265,7 @@ int xen_pt_msi_setup(XenPCIPassthroughState *s)
int xen_pt_msi_update(XenPCIPassthroughState *s)
{
XenPTMSI *msi = s->msi;
- return msi_msix_update(s, msi_addr64(msi), msi->data, msi->pirq,
+ return msi_msix_update(s, msi_addr64(msi), msi->data, 1, msi->pirq,
false, 0, &msi->pirq);
}
@@ -330,8 +331,8 @@ static int xen_pt_msix_update_one(XenPCIPassthroughState *s, int entry_nr)
entry->pirq = pirq;
}
- rc = msi_msix_update(s, entry->addr, entry->data, pirq, true,
- entry_nr, &entry->pirq);
+ rc = msi_msix_update(s, entry->addr, entry->data, entry->vector_ctrl, pirq,
+ true, entry_nr, &entry->pirq);
if (!rc) {
entry->updated = false;
@@ -434,6 +435,7 @@ static void pci_msix_write(void *opaque, hwaddr addr,
XenPTMSIX *msix = s->msix;
XenPTMSIXEntry *entry;
int entry_nr, offset;
+ const volatile uint32_t *vec_ctrl;
entry_nr = addr / PCI_MSIX_ENTRY_SIZE;
if (entry_nr < 0 || entry_nr >= msix->total_entries) {
@@ -443,20 +445,18 @@ static void pci_msix_write(void *opaque, hwaddr addr,
entry = &msix->msix_entry[entry_nr];
offset = addr % PCI_MSIX_ENTRY_SIZE;
- if (offset != PCI_MSIX_ENTRY_VECTOR_CTRL) {
- const volatile uint32_t *vec_ctrl;
+ /*
+ * If Xen intercepts the mask bit access, entry->vec_ctrl may not be
+ * up-to-date. Read from hardware directly.
+ */
+ vec_ctrl = s->msix->phys_iomem_base + entry_nr * PCI_MSIX_ENTRY_SIZE
+ + PCI_MSIX_ENTRY_VECTOR_CTRL;
+ if (offset != PCI_MSIX_ENTRY_VECTOR_CTRL) {
if (get_entry_value(entry, offset) == val) {
return;
}
- /*
- * If Xen intercepts the mask bit access, entry->vec_ctrl may not be
- * up-to-date. Read from hardware directly.
- */
- vec_ctrl = s->msix->phys_iomem_base + entry_nr * PCI_MSIX_ENTRY_SIZE
- + PCI_MSIX_ENTRY_VECTOR_CTRL;
-
if (msix->enabled && !(*vec_ctrl & PCI_MSIX_ENTRY_CTRL_MASKBIT)) {
XEN_PT_ERR(&s->dev, "Can't update msix entry %d since MSI-X is"
" already enabled.\n", entry_nr);
@@ -469,6 +469,15 @@ static void pci_msix_write(void *opaque, hwaddr addr,
set_entry_value(entry, offset, val);
if (offset == PCI_MSIX_ENTRY_VECTOR_CTRL) {
+
+ /*
+ * If guest unmasks the MSI-X interrupt, we also need to
+ * notify Xen to update it.
+ */
+ if ((val & PCI_MSIX_ENTRY_CTRL_MASKBIT) !=
+ (*vec_ctrl & PCI_MSIX_ENTRY_CTRL_MASKBIT))
+ entry->updated = true;
+
if (msix->enabled && !(val & PCI_MSIX_ENTRY_CTRL_MASKBIT)) {
xen_pt_msix_update_one(s, entry_nr);
}
--
1.7.1
Patch #3: For Qemu-traditional:
From 9c0cb506a10ce4588b5fc048f4be2b9997cb0c75 Mon Sep 17 00:00:00 2001
From: Feng Wu <feng.wu@xxxxxxxxx>
Date: Fri, 1 Nov 2013 00:52:48 -0400
Subject: [PATCH] x86: properly handle MSI-X unmask operation from guests
For a pass-through device with MSI-x capability, when guest tries
to unmask the MSI-x interrupt for the passed through device, xen
doesn't clear the mask bit for MSI-x in hardware in the following
scenario, which will cause network disconnection:
1. Guest masks the MSI-x interrupt
2. Guest updates the address and data for it
3. Guest unmasks the MSI-x interrupt (This is the problematic step)
In the step #3 above, Xen doesn't handle it well. When guest tries
to unmask MSI-X interrupt, it traps to Xen, Xen just returns to Qemu
if it notices that address or data has been modified by guest before,
then Qemu will update Xen with the latest value of address/data by
hypercall. However, in this whole process, the MSI-X interrupt unmask
operation is missing, which means Xen doesn't clear the mask bit in
hardware for the MSI-X interrupt, so it remains disabled, that is why
it loses the network connection.
This patch fixes this issue.
Signed-off-by: Feng Wu <feng.wu@xxxxxxxxx>
---
hw/pt-msi.c | 27 ++++++++++++++++++---------
1 files changed, 18 insertions(+), 9 deletions(-)
diff --git a/hw/pt-msi.c b/hw/pt-msi.c
index b03b989..64a2531 100644
--- a/hw/pt-msi.c
+++ b/hw/pt-msi.c
@@ -142,7 +142,7 @@ int pt_msi_update(struct pt_dev *d)
d->msi->pirq, gvec, gflags);
ret = xc_domain_update_msi_irq(xc_handle, domid, gvec,
- d->msi->pirq, gflags, 0);
+ d->msi->pirq, gflags, 1, 0, 0);
if (ret)
{
@@ -281,6 +281,7 @@ static int pt_msix_update_one(struct pt_dev *dev, int entry_nr)
int gvec = entry->io_mem[2] & 0xff;
uint64_t gaddr = *(uint64_t *)&entry->io_mem[0];
uint32_t gflags = __get_msi_gflags(entry->io_mem[2], gaddr);
+ uint32_t vector_ctrl = entry->io_mem[3];
int ret;
if ( !entry->flags )
@@ -319,6 +320,7 @@ static int pt_msix_update_one(struct pt_dev *dev, int entry_nr)
entry_nr, pirq, gvec);
ret = xc_domain_update_msi_irq(xc_handle, domid, gvec, pirq, gflags,
+ vector_ctrl, entry_nr,
dev->msix->mmio_base_addr);
if ( ret )
{
@@ -431,6 +433,7 @@ static void pci_msix_writel(void *opaque, target_phys_addr_t addr, uint32_t val)
struct pt_msix_info *msix = dev->msix;
struct msix_entry_info *entry;
int entry_nr, offset;
+ const volatile uint32_t *vec_ctrl;
if ( addr % 4 )
{
@@ -443,19 +446,17 @@ static void pci_msix_writel(void *opaque, target_phys_addr_t addr, uint32_t val)
entry = &msix->msix_entry[entry_nr];
offset = ((addr - msix->mmio_base_addr) % 16) / 4;
+ /*
+ * If Xen intercepts the mask bit access, io_mem[3] may not be
+ * up-to-date. Read from hardware directly.
+ */
+ vec_ctrl = msix->phys_iomem_base + 16 * entry_nr + 12;
+
if ( offset != 3 )
{
- const volatile uint32_t *vec_ctrl;
-
if ( entry->io_mem[offset] == val )
return;
- /*
- * If Xen intercepts the mask bit access, io_mem[3] may not be
- * up-to-date. Read from hardware directly.
- */
- vec_ctrl = msix->phys_iomem_base + 16 * entry_nr + 12;
-
if ( msix->enabled && !(*vec_ctrl & 0x1) )
{
PT_LOG("Can't update entry %d since MSI-X is already enabled"
@@ -471,6 +472,14 @@ static void pci_msix_writel(void *opaque, target_phys_addr_t addr, uint32_t val)
if ( offset == 3 )
{
+
+ /*
+ * If guest unmasks the MSI-X interrupt, we also need to
+ * notify Xen to update it.
+ */
+ if ((val & 0x1) != (*vec_ctrl & 0x1))
+ entry->flags = 1;
+
if ( msix->enabled && !(val & 0x1) )
pt_msix_update_one(dev, entry_nr);
}
--
1.7.1
Thanks,
Feng
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel
|