[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] [PATCH 2/6] ioemu:passthrough: MSI-INTx interrupt translation support
passthrough: MSI-INTx interrupt translation support This patch enables Xen to use MSI for MSI-capable devices as the underlying interrupt source even if the guest does not explicitly use it. The guest will still see an IO-APIC based INTx interrupt translated by Xen from the MSI irq. If the guest enables MSI or MSI-X for the passthrough device, this translation is automatically turned off. It can also be disabled in the config file at domain startup time. Signed-off-by: Qing He <qing.he@xxxxxxxxx> --- diff --git a/hw/pass-through.c b/hw/pass-through.c index d280ff6..625e644 100644 --- a/hw/pass-through.c +++ b/hw/pass-through.c @@ -34,6 +34,7 @@ struct php_dev { uint8_t r_bus; uint8_t r_dev; uint8_t r_func; + char *opt; }; struct dpci_infos { @@ -492,7 +493,7 @@ static struct pt_reg_info_tbl pt_emu_reg_msi_tbl[] = { .size = 2, .init_val = 0x0000, .ro_mask = 0x018E, - .emu_mask = 0xFFFE, + .emu_mask = 0xFFFF, .init = pt_msgctrl_reg_init, .u.w.read = pt_word_reg_read, .u.w.write = pt_msgctrl_reg_write, @@ -692,7 +693,7 @@ static int token_value(char *token) return strtol(token, NULL, 16); } -static int next_bdf(char **str, int *seg, int *bus, int *dev, int *func) +static int next_bdf(char **str, int *seg, int *bus, int *dev, int *func, char **opt) { char *token; const char *delim = ":.-"; @@ -711,18 +712,60 @@ static int next_bdf(char **str, int *seg, int *bus, int *dev, int *func) *dev = token_value(token); token = strsep(str, delim); + *opt = strchr(token, ','); + if (*opt) + *(*opt)++ = '\0'; + *func = token_value(token); return 1; } +static int get_next_keyval(char **option, char **key, char **val) +{ + char *opt, *k, *v; + + k = *option; + opt = strchr(k, ','); + if (opt) + *opt++ = '\0'; + v = strchr(k, '='); + if (!v) + return -1; + *v++ = '\0'; + + *key = k; + *val = v; + *option = opt; + + return 0; +} + +static void msi_set_enable(struct pt_dev *ptdev, int en) +{ + uint16_t val; + uint32_t address; + if (!ptdev->msi) + return; + + address = ptdev->msi->ctrl_offset; + if (!address) + return; + + val = pci_read_word(ptdev->pci_dev, address); + val &= ~PCI_MSI_FLAGS_ENABLE; + val |= en & PCI_MSI_FLAGS_ENABLE; + pci_write_word(ptdev->pci_dev, address, val); +} + /* Insert a new pass-through device into a specific pci slot. * input dom:bus:dev.func@slot, chose free one if slot == 0 * return -1: required slot not available * 0: no free hotplug slots, but normal slot should okay * >0: the new hotplug slot */ -static int __insert_to_pci_slot(int bus, int dev, int func, int slot) +static int __insert_to_pci_slot(int bus, int dev, int func, int slot, + char *opt) { int i, php_slot; @@ -759,6 +802,7 @@ found: dpci_infos.php_devs[php_slot].r_bus = bus; dpci_infos.php_devs[php_slot].r_dev = dev; dpci_infos.php_devs[php_slot].r_func = func; + dpci_infos.php_devs[php_slot].opt = opt; return PHP_TO_PCI_SLOT(php_slot); } @@ -768,19 +812,19 @@ found: int insert_to_pci_slot(char *bdf_slt) { int seg, bus, dev, func, slot; - char *bdf_str, *slt_str; + char *bdf_str, *slt_str, *opt; const char *delim="@"; bdf_str = strsep(&bdf_slt, delim); slt_str = bdf_slt; slot = token_value(slt_str); - if ( !next_bdf(&bdf_str, &seg, &bus, &dev, &func)) + if ( !next_bdf(&bdf_str, &seg, &bus, &dev, &func, &opt)) { return -1; } - return __insert_to_pci_slot(bus, dev, func, slot); + return __insert_to_pci_slot(bus, dev, func, slot, opt); } @@ -807,8 +851,9 @@ int test_pci_slot(int slot) int bdf_to_slot(char *bdf_str) { int seg, bus, dev, func, i; + char *opt; - if ( !next_bdf(&bdf_str, &seg, &bus, &dev, &func)) + if ( !next_bdf(&bdf_str, &seg, &bus, &dev, &func, &opt)) { return -1; } @@ -1960,9 +2005,15 @@ static uint32_t pt_msgctrl_reg_init(struct pt_dev *ptdev, pci_write_word(pdev, real_offset, reg_field & ~PCI_MSI_FLAGS_ENABLE); } ptdev->msi->flags |= (reg_field | MSI_FLAG_UNINIT); + ptdev->msi->ctrl_offset = real_offset; /* All register is 0 after reset, except first 4 byte */ reg_field &= reg->ro_mask; + + if (ptdev->msi_trans_cap) { + PT_LOG("Turning on MSI-INTx translation\n"); + ptdev->msi_trans_en = 1; + } return reg_field; } @@ -2673,6 +2724,34 @@ static int pt_linkctrl2_reg_write(struct pt_dev *ptdev, return 0; } +static void pt_unmap_msi_translate(struct pt_dev *ptdev) +{ + uint16_t e_device, e_intx; + int rc; + + /* MSI_ENABLE bit should be disabed until the new handler is set */ + msi_set_enable(ptdev, 0); + + e_device = (ptdev->dev.devfn >> 3) & 0x1f; + /* fix virtual interrupt pin to INTA# */ + e_intx = 0; + rc = xc_domain_unbind_pt_irq(xc_handle, domid, ptdev->msi->pirq, + PT_IRQ_TYPE_MSI_TRANSLATE, 0, + e_device, e_intx, 0); + if (rc < 0) + PT_LOG("Error: Unbinding pt irq for MSI-INTx failed! rc=%d\n", rc); + + if (ptdev->machine_irq) + { + rc = xc_domain_bind_pt_pci_irq(xc_handle, domid, ptdev->machine_irq, + 0, e_device, e_intx); + if ( rc < 0 ) + PT_LOG("Error: Rebinding of interrupt failed! rc=%d\n", rc); + } + + ptdev->msi_trans_en = 0; +} + /* write Message Control register */ static int pt_msgctrl_reg_write(struct pt_dev *ptdev, struct pt_reg_tbl *cfg_entry, @@ -2682,7 +2761,9 @@ static int pt_msgctrl_reg_write(struct pt_dev *ptdev, uint16_t writable_mask = 0; uint16_t throughable_mask = 0; uint16_t old_ctrl = cfg_entry->data; + uint8_t e_device, e_intx; PCIDevice *pd = (PCIDevice *)ptdev; + uint16_t val; /* Currently no support for multi-vector */ if ((*value & PCI_MSI_FLAGS_QSIZE) != 0x0) @@ -2699,21 +2780,29 @@ static int pt_msgctrl_reg_write(struct pt_dev *ptdev, PT_LOG("old_ctrl:%04xh new_ctrl:%04xh\n", old_ctrl, cfg_entry->data); /* create value for writing to I/O device register */ + val = *value; throughable_mask = ~reg->emu_mask & valid_mask; *value = ((*value & throughable_mask) | (dev_value & ~throughable_mask)); /* update MSI */ - if (*value & PCI_MSI_FLAGS_ENABLE) + if (val & PCI_MSI_FLAGS_ENABLE) { /* setup MSI pirq for the first time */ if (ptdev->msi->flags & MSI_FLAG_UNINIT) { - /* Init physical one */ - PT_LOG("setup msi for dev %x\n", pd->devfn); - if (pt_msi_setup(ptdev)) + if (ptdev->msi_trans_en) { + PT_LOG("guest enabling MSI, disable MSI-INTx translation\n"); + pt_unmap_msi_translate(ptdev); + } + else { - PT_LOG("pt_msi_setup error!!!\n"); - return -1; + /* Init physical one */ + PT_LOG("setup msi for dev %x\n", pd->devfn); + if (pt_msi_setup(ptdev)) + { + PT_LOG("pt_msi_setup error!!!\n"); + return -1; + } } pt_msi_update(ptdev); @@ -2725,6 +2814,12 @@ static int pt_msgctrl_reg_write(struct pt_dev *ptdev, else ptdev->msi->flags &= ~PCI_MSI_FLAGS_ENABLE; + /* pass through MSI_ENABLE bit when no MSI-INTx translation */ + if (!ptdev->msi_trans_en) { + *value &= ~PCI_MSI_FLAGS_ENABLE; + *value |= val & PCI_MSI_FLAGS_ENABLE; + } + return 0; } @@ -2870,7 +2965,13 @@ static int pt_msixctrl_reg_write(struct pt_dev *ptdev, /* update MSI-X */ if ((*value & PCI_MSIX_ENABLE) && !(*value & PCI_MSIX_MASK)) + { + if (ptdev->msi_trans_en) { + PT_LOG("guest enabling MSI-X, disable MSI-INTx translation\n"); + pt_unmap_msi_translate(ptdev); + } pt_msix_update(ptdev); + } ptdev->msix->enabled = !!(*value & PCI_MSIX_ENABLE); @@ -2879,7 +2980,8 @@ static int pt_msixctrl_reg_write(struct pt_dev *ptdev, struct pt_dev * register_real_device(PCIBus *e_bus, const char *e_dev_name, int e_devfn, uint8_t r_bus, uint8_t r_dev, - uint8_t r_func, uint32_t machine_irq, struct pci_access *pci_access) + uint8_t r_func, uint32_t machine_irq, struct pci_access *pci_access, + char *opt) { int rc = -1, i; struct pt_dev *assigned_device = NULL; @@ -2887,6 +2989,8 @@ struct pt_dev * register_real_device(PCIBus *e_bus, uint8_t e_device, e_intx; struct pci_config_cf8 machine_bdf; int free_pci_slot = -1; + char *key, *val; + int msi_translate; PT_LOG("Assigning real physical device %02x:%02x.%x ...\n", r_bus, r_dev, r_func); @@ -2908,13 +3012,41 @@ struct pt_dev * register_real_device(PCIBus *e_bus, if ( e_devfn == PT_VIRT_DEVFN_AUTO ) { /*indicate a static assignment(not hotplug), so find a free PCI hot plug slot */ - free_pci_slot = __insert_to_pci_slot(r_bus, r_dev, r_func, 0); + free_pci_slot = __insert_to_pci_slot(r_bus, r_dev, r_func, 0, NULL); if ( free_pci_slot > 0 ) e_devfn = free_pci_slot << 3; else PT_LOG("Error: no free virtual PCI hot plug slot, thus no live migration.\n"); } + msi_translate = direct_pci_msitranslate; + while (opt) { + if (get_next_keyval(&opt, &key, &val)) { + PT_LOG("Error: unrecognized PCI assignment option \"%s\"\n", opt); + break; + } + + if (strcmp(key, "msitranslate") == 0) + { + if (strcmp(val, "0") == 0 || strcmp(val, "no") == 0) + { + PT_LOG("Disable MSI translation via per device option\n"); + msi_translate = 0; + } + else if (strcmp(val, "1") == 0 || strcmp(val, "yes") == 0) + { + PT_LOG("Enable MSI translation via per device option\n"); + msi_translate = 1; + } + else + PT_LOG("Error: unrecognized value for msitranslate=\n"); + } + else + PT_LOG("Error: unrecognized PCI assignment option \"%s=%s\"\n", key, val); + + } + + /* Register device */ assigned_device = (struct pt_dev *) pci_register_device(e_bus, e_dev_name, sizeof(struct pt_dev), e_devfn, @@ -2929,6 +3061,7 @@ struct pt_dev * register_real_device(PCIBus *e_bus, dpci_infos.php_devs[PCI_TO_PHP_SLOT(free_pci_slot)].pt_dev = assigned_device; assigned_device->pci_dev = pci_dev; + assigned_device->msi_trans_cap = msi_translate; /* Assign device */ machine_bdf.reg = 0; @@ -2960,6 +3093,28 @@ struct pt_dev * register_real_device(PCIBus *e_bus, /* fix virtual interrupt pin to INTA# */ e_intx = 0; + while (assigned_device->msi_trans_en) + { + if (pt_msi_setup(assigned_device)) + { + PT_LOG("Error: MSI-INTx translation MSI setup failed, fallback\n"); + assigned_device->msi_trans_en = 0; + break; + } + + rc = xc_domain_bind_pt_irq(xc_handle, domid, assigned_device->msi->pirq, + PT_IRQ_TYPE_MSI_TRANSLATE, 0, + e_device, e_intx, 0); + if ( rc < 0) + { + PT_LOG("Error: MSI-INTx translation bind failed, fallback\n"); + assigned_device->msi_trans_en = 0; + break; + } + msi_set_enable(assigned_device, 1); + break; + } + if ( PT_MACHINE_IRQ_AUTO == machine_irq ) { int pirq = pci_dev->irq; @@ -2973,9 +3125,15 @@ struct pt_dev * register_real_device(PCIBus *e_bus, PT_LOG("Error: Mapping irq failed, rc = %d\n", rc); } else + { machine_irq = pirq; + assigned_device->machine_irq = pirq; + } } + if (assigned_device->msi_trans_en) + goto out; + /* bind machine_irq to device */ if ( 0 != machine_irq ) { @@ -2995,8 +3153,9 @@ struct pt_dev * register_real_device(PCIBus *e_bus, } out: - PT_LOG("Real physical device %02x:%02x.%x registered successfuly!\n", - r_bus, r_dev, r_func); + PT_LOG("Real physical device %02x:%02x.%x registered successfuly!\n" + "IRQ type = %s\n", r_bus, r_dev, r_func, + assigned_device->msi_trans_en? "MSI-INTx":"INTx"); return assigned_device; } @@ -3029,9 +3188,9 @@ int unregister_real_device(int php_slot) e_device = (assigned_device->dev.devfn >> 3) & 0x1f; /* fix virtual interrupt pin to INTA# */ e_intx = 0; - machine_irq = pci_dev->irq; + machine_irq = assigned_device->machine_irq; - if ( machine_irq != 0 ) { + if ( assigned_device->msi_trans_en == 0 && machine_irq ) { rc = xc_domain_unbind_pt_irq(xc_handle, domid, machine_irq, PT_IRQ_TYPE_PCI, 0, e_device, e_intx, 0); if ( rc < 0 ) @@ -3040,6 +3199,16 @@ int unregister_real_device(int php_slot) PT_LOG("Error: Unbinding of interrupt failed! rc=%d\n", rc); } } + else if (assigned_device->msi_trans_en) + { + rc = xc_domain_unbind_pt_irq(xc_handle, domid, assigned_device->msi->pirq, + PT_IRQ_TYPE_MSI_TRANSLATE, 0, + e_device, e_intx, 0); + if (rc < 0) + PT_LOG("Error: Unbinding pt irq for MSI-INTx failed! rc=%d\n", rc); + } + + /* TODO: unmap passthrough MSI and MSI-X irqs */ /* delete all emulated config registers */ pt_config_delete(assigned_device); @@ -3075,7 +3244,10 @@ int power_on_php_slot(int php_slot) php_dev->r_dev, php_dev->r_func, PT_MACHINE_IRQ_AUTO, - dpci_infos.pci_access); + dpci_infos.pci_access, + php_dev->opt); + + php_dev->opt = NULL; php_dev->pt_dev = pt_dev; @@ -3097,6 +3269,7 @@ int pt_init(PCIBus *e_bus, const char *direct_pci) char slot_str[8]; char *direct_pci_head = NULL; char *direct_pci_p = NULL; + char *opt; /* Initialize libpci */ pci_access = pci_alloc(); @@ -3125,11 +3298,11 @@ int pt_init(PCIBus *e_bus, const char *direct_pci) vslots = qemu_mallocz ( strlen(direct_pci) / 3 ); /* Assign given devices to guest */ - while ( next_bdf(&direct_pci_p, &seg, &b, &d, &f) ) + while ( next_bdf(&direct_pci_p, &seg, &b, &d, &f, &opt) ) { /* Register real device with the emulated bus */ pt_dev = register_real_device(e_bus, "DIRECT PCI", PT_VIRT_DEVFN_AUTO, - b, d, f, PT_MACHINE_IRQ_AUTO, pci_access); + b, d, f, PT_MACHINE_IRQ_AUTO, pci_access, opt); if ( pt_dev == NULL ) { PT_LOG("Error: Registration failed (%02x:%02x.%x)\n", b, d, f); diff --git a/hw/pass-through.h b/hw/pass-through.h index 8aa664b..a7d2727 100644 --- a/hw/pass-through.h +++ b/hw/pass-through.h @@ -121,6 +121,7 @@ struct pt_region { struct pt_msi_info { uint32_t flags; + uint32_t ctrl_offset; /* saved control offset */ int pirq; /* guest pirq corresponding */ uint32_t addr_lo; /* guest message address */ uint32_t addr_hi; /* guest message upper address */ @@ -158,6 +159,10 @@ struct pt_dev { /* emul reg group list */ struct pt_msi_info *msi; /* MSI virtualization */ struct pt_msix_info *msix; /* MSI-X virtualization */ + int machine_irq; /* saved pirq */ + /* Physical MSI to guest INTx translation when possible */ + int msi_trans_cap; + int msi_trans_en; }; /* Used for formatting PCI BDF into cf8 format */ diff --git a/hw/pci.h b/hw/pci.h index 4adc4d7..a527a39 100644 --- a/hw/pci.h +++ b/hw/pci.h @@ -64,6 +64,7 @@ struct PCIDevice { }; extern char direct_pci_str[]; +extern int direct_pci_msitranslate; PCIDevice *pci_register_device(PCIBus *bus, const char *name, int instance_size, int devfn, diff --git a/xenstore.c b/xenstore.c index 86e8b63..ff3d023 100644 --- a/xenstore.c +++ b/xenstore.c @@ -290,8 +290,10 @@ const char *xenstore_get_guest_uuid(void) { #endif } -#define DIRECT_PCI_STR_LEN 160 +#define DIRECT_PCI_STR_LEN 512 +#define PT_PCI_MSITRANSLATE_DEFAULT 1 char direct_pci_str[DIRECT_PCI_STR_LEN]; +int direct_pci_msitranslate; void xenstore_parse_domain_config(int hvm_domid) { char **e_danger = NULL; @@ -556,20 +558,50 @@ void xenstore_parse_domain_config(int hvm_domid) free(dev); dev = xs_read(xsh, XBT_NULL, buf, &len); - if ( strlen(dev) + strlen(direct_pci_str) > DIRECT_PCI_STR_LEN ) { + if ( strlen(dev) + strlen(direct_pci_str) > DIRECT_PCI_STR_LEN - 1) { fprintf(stderr, "qemu: too many pci pass-through devices\n"); memset(direct_pci_str, 0, DIRECT_PCI_STR_LEN); goto out; } + /* append to direct_pci_str */ + if ( !dev ) + continue; + + strcat(direct_pci_str, dev); + + if (pasprintf(&buf, "/local/domain/0/backend/pci/%u/%u/opts-%d", + hvm_domid, pci_devid, i) != -1) { + free(dev); + dev = xs_read(xsh, XBT_NULL, buf, &len); + } if ( dev ) { + if ( strlen(dev) + strlen(direct_pci_str) > DIRECT_PCI_STR_LEN - 2) { + fprintf(stderr, "qemu: too many pci pass-through devices\n"); + memset(direct_pci_str, 0, DIRECT_PCI_STR_LEN); + goto out; + } + strcat(direct_pci_str, ","); strcat(direct_pci_str, dev); - strcat(direct_pci_str, "-"); } + + strcat(direct_pci_str, "-"); } } + /* get the pci pass-through parameter */ + if (pasprintf(&buf, "/local/domain/0/backend/pci/%u/%u/msitranslate", + hvm_domid, pci_devid) == -1) + goto out; + + free(params); + params = xs_read(xsh, XBT_NULL, buf, &len); + if (params) + direct_pci_msitranslate = atoi(params); + else + direct_pci_msitranslate = PT_PCI_MSITRANSLATE_DEFAULT; + out: free(danger_type); free(params); _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |