[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] [PATCH 2/2] ioemu: Enable guest OS to program D0-D3hot states of an assigned device
This patch enables guest OS to program D0-D3hot states of assigned device. Thanks, -- Yuji Shimada. Signed-off-by: Yuji Shimada <shimada-yxb@xxxxxxxxxxxxxxx> diff --git a/hw/pass-through.c b/hw/pass-through.c index e76a3c3..ca9037d 100644 --- a/hw/pass-through.c +++ b/hw/pass-through.c @@ -27,6 +27,7 @@ #include "pci/pci.h" #include "pt-msi.h" #include "qemu-xen.h" +#include <unistd.h> struct php_dev { struct pt_dev *pt_dev; @@ -60,6 +61,10 @@ static uint32_t pt_irqpin_reg_init(struct pt_dev *ptdev, struct pt_reg_info_tbl *reg, uint32_t real_offset); static uint32_t pt_bar_reg_init(struct pt_dev *ptdev, struct pt_reg_info_tbl *reg, uint32_t real_offset); +static uint32_t pt_pmc_reg_init(struct pt_dev *ptdev, + struct pt_reg_info_tbl *reg, uint32_t real_offset); +static uint32_t pt_pmcsr_reg_init(struct pt_dev *ptdev, + struct pt_reg_info_tbl *reg, uint32_t real_offset); static uint32_t pt_linkctrl_reg_init(struct pt_dev *ptdev, struct pt_reg_info_tbl *reg, uint32_t real_offset); static uint32_t pt_devctrl2_reg_init(struct pt_dev *ptdev, @@ -78,6 +83,8 @@ static uint32_t pt_msixctrl_reg_init(struct pt_dev *ptdev, struct pt_reg_info_tbl *reg, uint32_t real_offset); static uint8_t pt_reg_grp_size_init(struct pt_dev *ptdev, struct pt_reg_grp_info_tbl *grp_reg, uint32_t base_offset); +static uint8_t pt_pm_size_init(struct pt_dev *ptdev, + struct pt_reg_grp_info_tbl *grp_reg, uint32_t base_offset); static uint8_t pt_msi_size_init(struct pt_dev *ptdev, struct pt_reg_grp_info_tbl *grp_reg, uint32_t base_offset); static uint8_t pt_msix_size_init(struct pt_dev *ptdev, @@ -146,6 +153,24 @@ static int pt_msgdata_reg_write(struct pt_dev *ptdev, static int pt_msixctrl_reg_write(struct pt_dev *ptdev, struct pt_reg_tbl *cfg_entry, uint16_t *value, uint16_t dev_value, uint16_t valid_mask); +static int pt_byte_reg_restore(struct pt_dev *ptdev, + struct pt_reg_tbl *cfg_entry, + uint32_t real_offset, uint8_t dev_value, uint8_t *value); +static int pt_word_reg_restore(struct pt_dev *ptdev, + struct pt_reg_tbl *cfg_entry, + uint32_t real_offset, uint16_t dev_value, uint16_t *value); +static int pt_long_reg_restore(struct pt_dev *ptdev, + struct pt_reg_tbl *cfg_entry, + uint32_t real_offset, uint32_t dev_value, uint32_t *value); +static int pt_cmd_reg_restore(struct pt_dev *ptdev, + struct pt_reg_tbl *cfg_entry, + uint32_t real_offset, uint16_t dev_value, uint16_t *value); +static int pt_pmcsr_reg_restore(struct pt_dev *ptdev, + struct pt_reg_tbl *cfg_entry, + uint32_t real_offset, uint16_t dev_value, uint16_t *value); +static int pt_bar_reg_restore(struct pt_dev *ptdev, + struct pt_reg_tbl *cfg_entry, + uint32_t real_offset, uint32_t dev_value, uint32_t *value); /* pt_reg_info_tbl declaration * - only for emulated register (either a part or whole bit). @@ -166,6 +191,7 @@ static struct pt_reg_info_tbl pt_emu_reg_header0_tbl[] = { .init = pt_vendor_reg_init, .u.w.read = pt_word_reg_read, .u.w.write = pt_word_reg_write, + .u.w.restore = NULL, }, /* Device ID reg */ { @@ -177,6 +203,7 @@ static struct pt_reg_info_tbl pt_emu_reg_header0_tbl[] = { .init = pt_device_reg_init, .u.w.read = pt_word_reg_read, .u.w.write = pt_word_reg_write, + .u.w.restore = NULL, }, /* Command reg */ { @@ -188,6 +215,7 @@ static struct pt_reg_info_tbl pt_emu_reg_header0_tbl[] = { .init = pt_common_reg_init, .u.w.read = pt_word_reg_read, .u.w.write = pt_cmd_reg_write, + .u.w.restore = pt_cmd_reg_restore, }, /* Capabilities Pointer reg */ { @@ -199,6 +227,7 @@ static struct pt_reg_info_tbl pt_emu_reg_header0_tbl[] = { .init = pt_ptr_reg_init, .u.b.read = pt_byte_reg_read, .u.b.write = pt_byte_reg_write, + .u.b.restore = NULL, }, /* Status reg */ /* use emulated Cap Ptr value to initialize, @@ -213,6 +242,7 @@ static struct pt_reg_info_tbl pt_emu_reg_header0_tbl[] = { .init = pt_status_reg_init, .u.w.read = pt_word_reg_read, .u.w.write = pt_word_reg_write, + .u.w.restore = NULL, }, /* Cache Line Size reg */ { @@ -224,6 +254,7 @@ static struct pt_reg_info_tbl pt_emu_reg_header0_tbl[] = { .init = pt_common_reg_init, .u.b.read = pt_byte_reg_read, .u.b.write = pt_byte_reg_write, + .u.b.restore = pt_byte_reg_restore, }, /* Latency Timer reg */ { @@ -235,6 +266,7 @@ static struct pt_reg_info_tbl pt_emu_reg_header0_tbl[] = { .init = pt_common_reg_init, .u.b.read = pt_byte_reg_read, .u.b.write = pt_byte_reg_write, + .u.b.restore = pt_byte_reg_restore, }, /* Header Type reg */ { @@ -246,6 +278,7 @@ static struct pt_reg_info_tbl pt_emu_reg_header0_tbl[] = { .init = pt_common_reg_init, .u.b.read = pt_byte_reg_read, .u.b.write = pt_byte_reg_write, + .u.b.restore = NULL, }, /* Interrupt Line reg */ { @@ -257,6 +290,7 @@ static struct pt_reg_info_tbl pt_emu_reg_header0_tbl[] = { .init = pt_common_reg_init, .u.b.read = pt_byte_reg_read, .u.b.write = pt_byte_reg_write, + .u.b.restore = NULL, }, /* Interrupt Pin reg */ { @@ -268,6 +302,7 @@ static struct pt_reg_info_tbl pt_emu_reg_header0_tbl[] = { .init = pt_irqpin_reg_init, .u.b.read = pt_byte_reg_read, .u.b.write = pt_byte_reg_write, + .u.b.restore = NULL, }, /* BAR 0 reg */ /* mask of BAR need to be decided later, depends on IO/MEM type */ @@ -278,6 +313,7 @@ static struct pt_reg_info_tbl pt_emu_reg_header0_tbl[] = { .init = pt_bar_reg_init, .u.dw.read = pt_bar_reg_read, .u.dw.write = pt_bar_reg_write, + .u.dw.restore = pt_bar_reg_restore, }, /* BAR 1 reg */ { @@ -287,6 +323,7 @@ static struct pt_reg_info_tbl pt_emu_reg_header0_tbl[] = { .init = pt_bar_reg_init, .u.dw.read = pt_bar_reg_read, .u.dw.write = pt_bar_reg_write, + .u.dw.restore = pt_bar_reg_restore, }, /* BAR 2 reg */ { @@ -296,6 +333,7 @@ static struct pt_reg_info_tbl pt_emu_reg_header0_tbl[] = { .init = pt_bar_reg_init, .u.dw.read = pt_bar_reg_read, .u.dw.write = pt_bar_reg_write, + .u.dw.restore = pt_bar_reg_restore, }, /* BAR 3 reg */ { @@ -305,6 +343,7 @@ static struct pt_reg_info_tbl pt_emu_reg_header0_tbl[] = { .init = pt_bar_reg_init, .u.dw.read = pt_bar_reg_read, .u.dw.write = pt_bar_reg_write, + .u.dw.restore = pt_bar_reg_restore, }, /* BAR 4 reg */ { @@ -314,6 +353,7 @@ static struct pt_reg_info_tbl pt_emu_reg_header0_tbl[] = { .init = pt_bar_reg_init, .u.dw.read = pt_bar_reg_read, .u.dw.write = pt_bar_reg_write, + .u.dw.restore = pt_bar_reg_restore, }, /* BAR 5 reg */ { @@ -323,6 +363,7 @@ static struct pt_reg_info_tbl pt_emu_reg_header0_tbl[] = { .init = pt_bar_reg_init, .u.dw.read = pt_bar_reg_read, .u.dw.write = pt_bar_reg_write, + .u.dw.restore = pt_bar_reg_restore, }, /* Expansion ROM BAR reg */ { @@ -334,6 +375,7 @@ static struct pt_reg_info_tbl pt_emu_reg_header0_tbl[] = { .init = pt_bar_reg_init, .u.dw.read = pt_long_reg_read, .u.dw.write = pt_exp_rom_bar_reg_write, + .u.dw.restore = pt_long_reg_restore, }, { .size = 0, @@ -352,6 +394,7 @@ static struct pt_reg_info_tbl pt_emu_reg_pm_tbl[] = { .init = pt_ptr_reg_init, .u.b.read = pt_byte_reg_read, .u.b.write = pt_byte_reg_write, + .u.b.restore = NULL, }, /* Power Management Capabilities reg */ { @@ -359,10 +402,11 @@ static struct pt_reg_info_tbl pt_emu_reg_pm_tbl[] = { .size = 2, .init_val = 0x0000, .ro_mask = 0xFFFF, - .emu_mask = 0xFFE8, - .init = pt_common_reg_init, + .emu_mask = 0xF9C8, + .init = pt_pmc_reg_init, .u.w.read = pt_word_reg_read, .u.w.write = pt_word_reg_write, + .u.w.restore = NULL, }, /* PCI Power Management Control/Status reg */ { @@ -370,21 +414,11 @@ static struct pt_reg_info_tbl pt_emu_reg_pm_tbl[] = { .size = 2, .init_val = 0x0008, .ro_mask = 0x60FC, - .emu_mask = 0xFF0B, - .init = pt_common_reg_init, + .emu_mask = 0x8100, + .init = pt_pmcsr_reg_init, .u.w.read = pt_word_reg_read, .u.w.write = pt_pmcsr_reg_write, - }, - /* Data reg */ - { - .offset = PCI_PM_DATA_REGISTER, - .size = 1, - .init_val = 0x00, - .ro_mask = 0xFF, - .emu_mask = 0xFF, - .init = pt_common_reg_init, - .u.b.read = pt_byte_reg_read, - .u.b.write = pt_byte_reg_write, + .u.w.restore = pt_pmcsr_reg_restore, }, { .size = 0, @@ -403,6 +437,7 @@ static struct pt_reg_info_tbl pt_emu_reg_vpd_tbl[] = { .init = pt_ptr_reg_init, .u.b.read = pt_byte_reg_read, .u.b.write = pt_byte_reg_write, + .u.b.restore = NULL, }, { .size = 0, @@ -421,6 +456,7 @@ static struct pt_reg_info_tbl pt_emu_reg_vendor_tbl[] = { .init = pt_ptr_reg_init, .u.b.read = pt_byte_reg_read, .u.b.write = pt_byte_reg_write, + .u.b.restore = NULL, }, { .size = 0, @@ -439,6 +475,7 @@ static struct pt_reg_info_tbl pt_emu_reg_pcie_tbl[] = { .init = pt_ptr_reg_init, .u.b.read = pt_byte_reg_read, .u.b.write = pt_byte_reg_write, + .u.b.restore = NULL, }, /* Device Capabilities reg */ { @@ -450,6 +487,7 @@ static struct pt_reg_info_tbl pt_emu_reg_pcie_tbl[] = { .init = pt_common_reg_init, .u.dw.read = pt_long_reg_read, .u.dw.write = pt_long_reg_write, + .u.dw.restore = NULL, }, /* Device Control reg */ { @@ -461,6 +499,7 @@ static struct pt_reg_info_tbl pt_emu_reg_pcie_tbl[] = { .init = pt_common_reg_init, .u.w.read = pt_word_reg_read, .u.w.write = pt_devctrl_reg_write, + .u.w.restore = pt_word_reg_restore, }, /* Link Control reg */ { @@ -472,6 +511,7 @@ static struct pt_reg_info_tbl pt_emu_reg_pcie_tbl[] = { .init = pt_linkctrl_reg_init, .u.w.read = pt_word_reg_read, .u.w.write = pt_linkctrl_reg_write, + .u.w.restore = pt_word_reg_restore, }, /* Device Control 2 reg */ { @@ -483,6 +523,7 @@ static struct pt_reg_info_tbl pt_emu_reg_pcie_tbl[] = { .init = pt_devctrl2_reg_init, .u.w.read = pt_word_reg_read, .u.w.write = pt_devctrl2_reg_write, + .u.w.restore = pt_word_reg_restore, }, /* Link Control 2 reg */ { @@ -494,6 +535,7 @@ static struct pt_reg_info_tbl pt_emu_reg_pcie_tbl[] = { .init = pt_linkctrl2_reg_init, .u.w.read = pt_word_reg_read, .u.w.write = pt_linkctrl2_reg_write, + .u.w.restore = pt_word_reg_restore, }, { .size = 0, @@ -512,6 +554,7 @@ static struct pt_reg_info_tbl pt_emu_reg_msi_tbl[] = { .init = pt_ptr_reg_init, .u.b.read = pt_byte_reg_read, .u.b.write = pt_byte_reg_write, + .u.b.restore = NULL, }, /* Message Control reg */ { @@ -523,6 +566,7 @@ static struct pt_reg_info_tbl pt_emu_reg_msi_tbl[] = { .init = pt_msgctrl_reg_init, .u.w.read = pt_word_reg_read, .u.w.write = pt_msgctrl_reg_write, + .u.w.restore = NULL, }, /* Message Address reg */ { @@ -534,6 +578,7 @@ static struct pt_reg_info_tbl pt_emu_reg_msi_tbl[] = { .init = pt_msgaddr32_reg_init, .u.dw.read = pt_long_reg_read, .u.dw.write = pt_msgaddr32_reg_write, + .u.dw.restore = NULL, }, /* Message Upper Address reg (if PCI_MSI_FLAGS_64BIT set) */ { @@ -545,6 +590,7 @@ static struct pt_reg_info_tbl pt_emu_reg_msi_tbl[] = { .init = pt_msgaddr64_reg_init, .u.dw.read = pt_long_reg_read, .u.dw.write = pt_msgaddr64_reg_write, + .u.dw.restore = NULL, }, /* Message Data reg (16 bits of data for 32-bit devices) */ { @@ -556,6 +602,7 @@ static struct pt_reg_info_tbl pt_emu_reg_msi_tbl[] = { .init = pt_msgdata_reg_init, .u.w.read = pt_word_reg_read, .u.w.write = pt_msgdata_reg_write, + .u.w.restore = NULL, }, /* Message Data reg (16 bits of data for 64-bit devices) */ { @@ -567,6 +614,7 @@ static struct pt_reg_info_tbl pt_emu_reg_msi_tbl[] = { .init = pt_msgdata_reg_init, .u.w.read = pt_word_reg_read, .u.w.write = pt_msgdata_reg_write, + .u.w.restore = NULL, }, { .size = 0, @@ -585,6 +633,7 @@ static struct pt_reg_info_tbl pt_emu_reg_msix_tbl[] = { .init = pt_ptr_reg_init, .u.b.read = pt_byte_reg_read, .u.b.write = pt_byte_reg_write, + .u.b.restore = NULL, }, /* Message Control reg */ { @@ -596,6 +645,7 @@ static struct pt_reg_info_tbl pt_emu_reg_msix_tbl[] = { .init = pt_msixctrl_reg_init, .u.w.read = pt_word_reg_read, .u.w.write = pt_msixctrl_reg_write, + .u.w.restore = NULL, }, { .size = 0, @@ -624,7 +674,7 @@ static const struct pt_reg_grp_info_tbl pt_emu_reg_grp_tbl[] = { .grp_id = PCI_CAP_ID_PM, .grp_type = GRP_TYPE_EMU, .grp_size = PCI_PM_SIZEOF, - .size_init = pt_reg_grp_size_init, + .size_init = pt_pm_size_init, .emu_reg_tbl= pt_emu_reg_pm_tbl, }, /* AGP Capability Structure reg group */ @@ -777,23 +827,6 @@ static int get_next_keyval(char **option, char **key, char **val) return 0; } -static void msi_set_enable(struct pt_dev *ptdev, int en) -{ - uint16_t val; - uint32_t address; - if (!ptdev->msi) - return; - - address = ptdev->msi->ctrl_offset; - if (!address) - return; - - val = pci_read_word(ptdev->pci_dev, address); - val &= ~PCI_MSI_FLAGS_ENABLE; - val |= en & PCI_MSI_FLAGS_ENABLE; - pci_write_word(ptdev->pci_dev, address, val); -} - /* Insert a new pass-through device into a specific pci slot. * input dom:bus:dev.func@slot, chose free one if slot == 0 * return -1: required slot not available @@ -1084,6 +1117,7 @@ static void pt_pci_write_config(PCIDevice *d, uint32_t address, uint32_t val, { struct pt_dev *assigned_device = (struct pt_dev *)d; struct pci_dev *pci_dev = assigned_device->pci_dev; + struct pt_pm_info *pm_state = assigned_device->pm_state; struct pt_reg_grp_tbl *reg_grp_entry = NULL; struct pt_reg_grp_info_tbl *reg_grp = NULL; struct pt_reg_tbl *reg_entry = NULL; @@ -1144,6 +1178,13 @@ static void pt_pci_write_config(PCIDevice *d, uint32_t address, uint32_t val, (d->devfn & 0x7), address, len); } + /* check power state transition flags */ + if (pm_state->flags & PT_FLAG_TRANSITING) + /* can't accept untill previous power state transition is completed. + * so finished previous request here. + */ + qemu_run_one_timer(pm_state->pm_timer); + /* find register group entry */ reg_grp_entry = pt_find_reg_grp(assigned_device, address); if (reg_grp_entry) @@ -1274,6 +1315,11 @@ out: break; } + if (pm_state->flags & PT_FLAG_TRANSITING) + /* set QEMUTimer */ + qemu_mod_timer(pm_state->pm_timer, + (qemu_get_clock(rt_clock) + pm_state->pm_delay)); + exit: return; } @@ -1282,6 +1328,7 @@ static uint32_t pt_pci_read_config(PCIDevice *d, uint32_t address, int len) { struct pt_dev *assigned_device = (struct pt_dev *)d; struct pci_dev *pci_dev = assigned_device->pci_dev; + struct pt_pm_info *pm_state = assigned_device->pm_state; uint32_t val = 0xFFFFFFFF; struct pt_reg_grp_tbl *reg_grp_entry = NULL; struct pt_reg_grp_info_tbl *reg_grp = NULL; @@ -1324,6 +1371,13 @@ static uint32_t pt_pci_read_config(PCIDevice *d, uint32_t address, int len) goto exit; } + /* check power state transition flags */ + if (pm_state->flags & PT_FLAG_TRANSITING) + /* can't accept untill previous power state transition is completed. + * so finished previous request here. + */ + qemu_run_one_timer(pm_state->pm_timer); + /* find register group entry */ reg_grp_entry = pt_find_reg_grp(assigned_device, address); if (reg_grp_entry) @@ -1643,6 +1697,35 @@ uint8_t find_cap_offset(struct pci_dev *pci_dev, uint8_t cap) return 0; } +uint32_t find_ext_cap_offset(struct pci_dev *pci_dev, uint32_t cap) +{ + uint32_t header = 0; + int max_cap = 480; + int pos = 0x100; + + do + { + header = pci_read_long(pci_dev, pos); + /* + * If we have no capabilities, this is indicated by cap ID, + * cap version and next pointer all being 0. + */ + if (header == 0) + break; + + if (PCI_EXT_CAP_ID(header) == cap) + return pos; + + pos = PCI_EXT_CAP_NEXT(header); + if (pos < 0x100) + break; + + max_cap--; + }while (max_cap > 0); + + return 0; +} + /* parse BAR */ static int pt_bar_reg_parse( struct pt_dev *ptdev, struct pt_reg_info_tbl *reg) @@ -1751,6 +1834,298 @@ static void pt_bar_mapping(struct pt_dev *ptdev, int io_enable, int mem_enable) return; } +/* check power state transition */ +int check_power_state(struct pt_dev *ptdev) +{ + struct pt_pm_info *pm_state = ptdev->pm_state; + PCIDevice *d = &ptdev->dev; + uint16_t read_val = 0; + uint16_t cur_state = 0; + + /* get current power state */ + read_val = pci_read_word(ptdev->pci_dev, + (pm_state->pm_base + PCI_PM_CTRL)); + cur_state = read_val & PCI_PM_CTRL_STATE_MASK; + + if (pm_state->req_state != cur_state) + { + PT_LOG("Error: Failed to change power state. " + "[%02x:%02x.%x][requested state:%d][current state:%d]\n", + pci_bus_num(d->bus), ((d->devfn >> 3) & 0x1F), (d->devfn & 0x7), + pm_state->req_state, cur_state); + return -1; + } + return 0; +} + +/* save AER register */ +static void pt_aer_reg_save(struct pt_dev *ptdev) +{ + PCIDevice *d = &ptdev->dev; + uint32_t aer_base = ptdev->pm_state->aer_base; + int i = 0; + /* Root Port and Root Complex Event Collector need size expansion */ + int aer_size = 0x2c; + + for (i=0; i < aer_size; i+=4) + { + switch (i) { + /* after reset, following register values should be restored. + * So, save them. + */ + case PCI_ERR_UNCOR_MASK: + case PCI_ERR_UNCOR_SEVER: + case PCI_ERR_COR_MASK: + case PCI_ERR_CAP: + *(uint32_t*)(d->config + (aer_base + i)) + = pci_read_long(ptdev->pci_dev, (aer_base + i)); + break; + default: + break; + } + } +} + +/* restore AER register */ +static void pt_aer_reg_restore(struct pt_dev *ptdev) +{ + PCIDevice *d = &ptdev->dev; + uint32_t aer_base = ptdev->pm_state->aer_base; + int i = 0; + uint32_t config = 0; + /* Root Port and Root Complex Event Collector need size expansion */ + int aer_size = 0x2c; + + for (i=0; i < aer_size; i+=4) + { + switch (i) { + /* the following registers should be reconfigured to correct values + * after reset. restore them. + */ + case PCI_ERR_UNCOR_MASK: + case PCI_ERR_UNCOR_SEVER: + case PCI_ERR_COR_MASK: + case PCI_ERR_CAP: + config = *(uint32_t*)(d->config + (aer_base + i)); + pci_write_long(ptdev->pci_dev, (aer_base + i), config); + break; + /* other registers should not be reconfigured after reset + * if there is no reason + */ + default: + break; + } + } +} + +/* reset Interrupt and I/O resource */ +void pt_reset_interrupt_and_io_mapping(struct pt_dev *ptdev) +{ + PCIDevice *d = &ptdev->dev; + PCIIORegion *r; + int i = 0; + + /* disable MSI/MSI-X and MSI-INTx translation */ + if (ptdev->msi) + pt_msi_disable(ptdev); + if (ptdev->msix) + pt_msix_disable(ptdev); + + /* clear all virtual region address */ + for (i=0; i<PCI_NUM_REGIONS; i++) + { + r = &d->io_regions[i]; + r->addr = -1; + } + + /* unmapping BAR */ + pt_bar_mapping(ptdev, 0, 0); +} + +/* restore a part of I/O device register */ +static void pt_config_restore(struct pt_dev *ptdev) +{ + struct pt_reg_grp_tbl *reg_grp_entry = NULL; + struct pt_reg_grp_info_tbl *reg_grp = NULL; + struct pt_reg_tbl *reg_entry = NULL; + struct pt_reg_info_tbl *reg = NULL; + uint32_t real_offset = 0; + uint32_t read_val = 0; + uint32_t val = 0; + int ret = 0; + PCIDevice *d = &ptdev->dev; + + /* find emulate register group entry */ + for (reg_grp_entry = ptdev->reg_grp_tbl_head.lh_first; reg_grp_entry; + reg_grp_entry = reg_grp_entry->entries.le_next) + { + /* find emulate register entry */ + for (reg_entry = reg_grp_entry->reg_tbl_head.lh_first; reg_entry; + reg_entry = reg_entry->entries.le_next) + { + reg = reg_entry->reg; + + /* check whether restoring is needed */ + if (!reg->u.b.restore) + continue; + + real_offset = (reg_grp_entry->base_offset + reg->offset); + + /* read I/O device register value */ + switch (reg->size) { + case 1: + read_val = pci_read_byte(ptdev->pci_dev, real_offset); + break; + case 2: + read_val = pci_read_word(ptdev->pci_dev, real_offset); + break; + case 4: + read_val = pci_read_long(ptdev->pci_dev, real_offset); + break; + } + + val = 0; + + /* restore based on register size */ + switch (reg->size) { + case 1: + /* byte register */ + ret = reg->u.b.restore(ptdev, reg_entry, real_offset, + (uint8_t)read_val, (uint8_t *)&val); + break; + case 2: + /* word register */ + ret = reg->u.w.restore(ptdev, reg_entry, real_offset, + (uint16_t)read_val, (uint16_t *)&val); + break; + case 4: + /* double word register */ + ret = reg->u.dw.restore(ptdev, reg_entry, real_offset, + (uint32_t)read_val, (uint32_t *)&val); + break; + } + + /* restoring error */ + if (ret < 0) + { + /* exit I/O emulator */ + PT_LOG("Internal error: Invalid restoring " + "return value[%d]. I/O emulator exit.\n", ret); + exit(1); + } + +#ifdef PT_DEBUG_PCI_CONFIG_ACCESS + PT_LOG("[%02x:%02x.%x]: address=%04x val=0x%08x len=%d\n", + pci_bus_num(d->bus), (d->devfn >> 3) & 0x1F, (d->devfn & 0x7), + real_offset, val, reg->size); +#endif + + switch (reg->size) { + case 1: + pci_write_byte(ptdev->pci_dev, real_offset, val); + break; + case 2: + pci_write_word(ptdev->pci_dev, real_offset, val); + break; + case 4: + pci_write_long(ptdev->pci_dev, real_offset, val); + break; + } + } + } + + /* if AER supported, restore it */ + if (ptdev->pm_state->aer_base) + pt_aer_reg_restore(ptdev); +} + +/* reinitialize all emulate registers */ +static void pt_config_reinit(struct pt_dev *ptdev) +{ + struct pt_reg_grp_tbl *reg_grp_entry = NULL; + struct pt_reg_grp_info_tbl *reg_grp = NULL; + struct pt_reg_tbl *reg_entry = NULL; + struct pt_reg_info_tbl *reg = NULL; + + /* find emulate register group entry */ + for (reg_grp_entry = ptdev->reg_grp_tbl_head.lh_first; reg_grp_entry; + reg_grp_entry = reg_grp_entry->entries.le_next) + { + /* find emulate register entry */ + for (reg_entry = reg_grp_entry->reg_tbl_head.lh_first; reg_entry; + reg_entry = reg_entry->entries.le_next) + { + reg = reg_entry->reg; + if (reg->init) + /* initialize emulate register */ + reg_entry->data = reg->init(ptdev, reg_entry->reg, + (reg_grp_entry->base_offset + reg->offset)); + } + } +} + +void pt_from_d3hot_to_d0_with_reset(void *opaque) +{ + struct pt_dev *ptdev = opaque; + PCIDevice *d = &ptdev->dev; + struct pt_pm_info *pm_state = ptdev->pm_state; + uint8_t e_device = 0; + uint8_t e_intx = 0; + int ret = 0; + + /* check power state */ + ret = check_power_state(ptdev); + + if (ret < 0) + goto out; + + PT_LOG("Reinitialize PCI configuration registers " + "due to power state transition with internal reset. [%02x:%02x.%x]\n", + pci_bus_num(d->bus), ((d->devfn >> 3) & 0x1F), (d->devfn & 0x7)); + + /* restore a part of I/O device register */ + pt_config_restore(ptdev); + + /* reinitialize all emulate register */ + pt_config_reinit(ptdev); + + /* setup MSI-INTx translation if support */ + ret = pt_enable_msi_translate(ptdev); + + /* rebind machine_irq to device */ + if (ret < 0 && ptdev->machine_irq != 0) + { + e_device = (ptdev->dev.devfn >> 3) & 0x1f; + /* fix virtual interrupt pin to INTA# */ + e_intx = 0; + + ret = xc_domain_bind_pt_pci_irq(xc_handle, domid, ptdev->machine_irq, + 0, e_device, e_intx); + if (ret < 0) + PT_LOG("Error: Rebinding of interrupt failed! ret=%d\n", ret); + } + +out: + /* power state transition flags off */ + pm_state->flags &= ~PT_FLAG_TRANSITING; + + qemu_free_timer(pm_state->pm_timer); +} + +void pt_default_power_transition(void *opaque) +{ + struct pt_dev *ptdev = opaque; + struct pt_pm_info *pm_state = ptdev->pm_state; + + /* check power state */ + check_power_state(ptdev); + + /* power state transition flags off */ + pm_state->flags &= ~PT_FLAG_TRANSITING; + + qemu_free_timer(pm_state->pm_timer); +} + /* initialize emulate register */ static int pt_config_reg_init(struct pt_dev *ptdev, struct pt_reg_grp_tbl *reg_grp, @@ -1878,6 +2253,15 @@ static void pt_config_delete(struct pt_dev *ptdev) if (ptdev->msi) free(ptdev->msi); + /* free Power Management info table */ + if (ptdev->pm_state) + { + if (ptdev->pm_state->pm_timer) + qemu_free_timer(ptdev->pm_state->pm_timer); + + free(ptdev->pm_state); + } + /* free all register group entry */ while ((reg_grp_entry = ptdev->reg_grp_tbl_head.lh_first) != NULL) { @@ -2027,6 +2411,36 @@ static uint32_t pt_bar_reg_init(struct pt_dev *ptdev, return reg_field; } +/* initialize Power Management Capabilities register */ +static uint32_t pt_pmc_reg_init(struct pt_dev *ptdev, + struct pt_reg_info_tbl *reg, uint32_t real_offset) +{ + PCIDevice *d = &ptdev->dev; + + /* set Power Management Capabilities register */ + ptdev->pm_state->pmc_field = *(uint16_t *)(d->config + real_offset); + + return reg->init_val; +} + +/* initialize PCI Power Management Control/Status register */ +static uint32_t pt_pmcsr_reg_init(struct pt_dev *ptdev, + struct pt_reg_info_tbl *reg, uint32_t real_offset) +{ + PCIDevice *d = &ptdev->dev; + uint16_t cap_ver = 0; + + /* check PCI Power Management support version */ + cap_ver = ptdev->pm_state->pmc_field & PCI_PM_CAP_VER_MASK; + + if (cap_ver > 2) + /* set No Soft Reset */ + ptdev->pm_state->no_soft_reset = (*(uint8_t *)(d->config + real_offset) + & (uint8_t)PCI_PM_CTRL_NO_SOFT_RESET); + + return reg->init_val; +} + /* initialize Link Control register */ static uint32_t pt_linkctrl_reg_init(struct pt_dev *ptdev, struct pt_reg_info_tbl *reg, uint32_t real_offset) @@ -2108,11 +2522,6 @@ static uint32_t pt_msgctrl_reg_init(struct pt_dev *ptdev, /* All register is 0 after reset, except first 4 byte */ reg_field &= reg->ro_mask; - if (ptdev->msi_trans_cap) { - PT_LOG("Turning on MSI-INTx translation\n"); - ptdev->msi_trans_en = 1; - } - return reg_field; } @@ -2180,7 +2589,9 @@ static uint32_t pt_msixctrl_reg_init(struct pt_dev *ptdev, pci_write_word(pdev, real_offset, reg_field & ~PCI_MSIX_ENABLE); reg_field &= ~(PCI_MSIX_ENABLE | PCI_MSIX_MASK); } - + + ptdev->msix->ctrl_offset = real_offset; + return reg_field; } @@ -2191,6 +2602,32 @@ static uint8_t pt_reg_grp_size_init(struct pt_dev *ptdev, return grp_reg->grp_size; } +/* get Power Management Capability Structure register group size */ +static uint8_t pt_pm_size_init(struct pt_dev *ptdev, + struct pt_reg_grp_info_tbl *grp_reg, uint32_t base_offset) +{ + ptdev->pm_state = qemu_mallocz(sizeof(struct pt_pm_info)); + if (!ptdev->pm_state) + { + /* exit I/O emulator */ + PT_LOG("Error: Allocating pt_pm_info failed. I/O emulator exit.\n"); + exit(1); + } + + /* set Power Management Capability base offset */ + ptdev->pm_state->pm_base = base_offset; + + /* find AER register and set AER Capability base offset */ + ptdev->pm_state->aer_base = find_ext_cap_offset(ptdev->pci_dev, + (uint32_t)PCI_EXT_CAP_ID_AER); + + /* save AER register */ + if (ptdev->pm_state->aer_base) + pt_aer_reg_save(ptdev); + + return grp_reg->grp_size; +} + /* get MSI Capability Structure register group size */ static uint8_t pt_msi_size_init(struct pt_dev *ptdev, struct pt_reg_grp_info_tbl *grp_reg, uint32_t base_offset) @@ -2215,7 +2652,8 @@ static uint8_t pt_msi_size_init(struct pt_dev *ptdev, exit(1); } memset(ptdev->msi, 0, sizeof(struct pt_msi_info)); - + ptdev->msi->pirq = -1; + return msi_size; } @@ -2705,18 +3143,17 @@ static int pt_pmcsr_reg_write(struct pt_dev *ptdev, uint16_t *value, uint16_t dev_value, uint16_t valid_mask) { struct pt_reg_info_tbl *reg = cfg_entry->reg; + PCIDevice *d = &ptdev->dev; uint16_t writable_mask = 0; uint16_t throughable_mask = 0; uint16_t pmcsr_mask = (PCI_PM_CTRL_PME_ENABLE | PCI_PM_CTRL_DATA_SEL_MASK | PCI_PM_CTRL_PME_STATUS); + struct pt_pm_info *pm_state = ptdev->pm_state; + uint16_t read_val = 0; /* modify emulate register */ writable_mask = reg->emu_mask & ~reg->ro_mask & valid_mask & ~pmcsr_mask; - /* ignore it when the requested state neither D3 nor D0 */ - if (((*value & PCI_PM_CTRL_STATE_MASK) != PCI_PM_CTRL_STATE_MASK) && - ((*value & PCI_PM_CTRL_STATE_MASK) != 0)) - writable_mask &= ~PCI_PM_CTRL_STATE_MASK; cfg_entry->data = ((*value & writable_mask) | (cfg_entry->data & ~writable_mask)); @@ -2726,6 +3163,100 @@ static int pt_pmcsr_reg_write(struct pt_dev *ptdev, *value = ((*value & throughable_mask) | (dev_value & ~throughable_mask)); + /* set I/O device power state */ + pm_state->cur_state = (dev_value & PCI_PM_CTRL_STATE_MASK); + + /* set Guest requested PowerState */ + pm_state->req_state = (*value & PCI_PM_CTRL_STATE_MASK); + + /* check power state transition or not */ + if (pm_state->cur_state == pm_state->req_state) + /* not power state transition */ + return 0; + + /* check enable power state transition */ + if ((pm_state->req_state != 0) && + (pm_state->cur_state > pm_state->req_state)) + { + PT_LOG("Error: Invalid power transition. " + "[%02x:%02x.%x][requested state:%d][current state:%d]\n", + pci_bus_num(d->bus), ((d->devfn >> 3) & 0x1F), (d->devfn & 0x7), + pm_state->req_state, pm_state->cur_state); + + return 0; + } + + /* check if this device supports the requested power state */ + if (((pm_state->req_state == 1) && !(pm_state->pmc_field & PCI_PM_CAP_D1)) + || ((pm_state->req_state == 2) && + !(pm_state->pmc_field & PCI_PM_CAP_D2))) + { + PT_LOG("Error: Invalid power transition. " + "[%02x:%02x.%x][requested state:%d][current state:%d]\n", + pci_bus_num(d->bus), ((d->devfn >> 3) & 0x1F), (d->devfn & 0x7), + pm_state->req_state, pm_state->cur_state); + + return 0; + } + + /* in case of transition related to D3hot, it's necessary to wait 10 ms. + * But because writing to register will be performed later on actually, + * don't start QEMUTimer right now, just alloc and init QEMUTimer here. + */ + if ((pm_state->cur_state == 3) || (pm_state->req_state == 3)) + { + if (pm_state->req_state == 0) + { + /* alloc and init QEMUTimer */ + if (!pm_state->no_soft_reset) + { + pm_state->pm_timer = qemu_new_timer(rt_clock, + pt_from_d3hot_to_d0_with_reset, ptdev); + + /* reset Interrupt and I/O resource mapping */ + pt_reset_interrupt_and_io_mapping(ptdev); + } + else + pm_state->pm_timer = qemu_new_timer(rt_clock, + pt_default_power_transition, ptdev); + } + else + /* alloc and init QEMUTimer */ + pm_state->pm_timer = qemu_new_timer(rt_clock, + pt_default_power_transition, ptdev); + + /* set power state transition delay */ + pm_state->pm_delay = 10; + + /* power state transition flags on */ + pm_state->flags |= PT_FLAG_TRANSITING; + } + /* in case of transition related to D0, D1 and D2, + * no need to use QEMUTimer. + * So, we perfom writing to register here and then read it back. + */ + else + { + /* write power state to I/O device register */ + pci_write_word(ptdev->pci_dev, + (pm_state->pm_base + PCI_PM_CTRL), *value); + + /* in case of transition related to D2, + * it's necessary to wait 200 usec. + * But because QEMUTimer do not support microsec unit right now, + * so we do wait ourself here. + */ + if ((pm_state->cur_state == 2) || (pm_state->req_state == 2)) + usleep(200); + + /* check power state */ + check_power_state(ptdev); + + /* recreate value for writing to I/O device register */ + *value = pci_read_word(ptdev->pci_dev, + (pm_state->pm_base + PCI_PM_CTRL)); + } + return 0; } @@ -2760,8 +3291,7 @@ static int pt_linkctrl_reg_write(struct pt_dev *ptdev, struct pt_reg_info_tbl *reg = cfg_entry->reg; uint16_t writable_mask = 0; uint16_t throughable_mask = 0; - uint16_t linkctrl_mask = (PCI_EXP_LNKCTL_ASPM | 0x04 | - PCI_EXP_LNKCTL_DISABLE | + uint16_t linkctrl_mask = (0x04 | PCI_EXP_LNKCTL_DISABLE | PCI_EXP_LNKCTL_RETRAIN | 0x0400 | 0x0800 | 0xF000); @@ -2825,34 +3355,6 @@ static int pt_linkctrl2_reg_write(struct pt_dev *ptdev, return 0; } -static void pt_unmap_msi_translate(struct pt_dev *ptdev) -{ - uint16_t e_device, e_intx; - int rc; - - /* MSI_ENABLE bit should be disabed until the new handler is set */ - msi_set_enable(ptdev, 0); - - e_device = (ptdev->dev.devfn >> 3) & 0x1f; - /* fix virtual interrupt pin to INTA# */ - e_intx = 0; - rc = xc_domain_unbind_pt_irq(xc_handle, domid, ptdev->msi->pirq, - PT_IRQ_TYPE_MSI_TRANSLATE, 0, - e_device, e_intx, 0); - if (rc < 0) - PT_LOG("Error: Unbinding pt irq for MSI-INTx failed! rc=%d\n", rc); - - if (ptdev->machine_irq) - { - rc = xc_domain_bind_pt_pci_irq(xc_handle, domid, ptdev->machine_irq, - 0, e_device, e_intx); - if ( rc < 0 ) - PT_LOG("Error: Rebinding of interrupt failed! rc=%d\n", rc); - } - - ptdev->msi_trans_en = 0; -} - /* write Message Control register */ static int pt_msgctrl_reg_write(struct pt_dev *ptdev, struct pt_reg_tbl *cfg_entry, @@ -2893,7 +3395,7 @@ static int pt_msgctrl_reg_write(struct pt_dev *ptdev, { if (ptdev->msi_trans_en) { PT_LOG("guest enabling MSI, disable MSI-INTx translation\n"); - pt_unmap_msi_translate(ptdev); + pt_disable_msi_translate(ptdev); } else { @@ -3075,7 +3577,7 @@ static int pt_msixctrl_reg_write(struct pt_dev *ptdev, { if (ptdev->msi_trans_en) { PT_LOG("guest enabling MSI-X, disable MSI-INTx translation\n"); - pt_unmap_msi_translate(ptdev); + pt_disable_msi_translate(ptdev); } pt_msix_update(ptdev); } @@ -3085,6 +3587,141 @@ static int pt_msixctrl_reg_write(struct pt_dev *ptdev, return 0; } +/* restore byte size emulate register */ +static int pt_byte_reg_restore(struct pt_dev *ptdev, + struct pt_reg_tbl *cfg_entry, + uint32_t real_offset, uint8_t dev_value, uint8_t *value) +{ + struct pt_reg_info_tbl *reg = cfg_entry->reg; + PCIDevice *d = &ptdev->dev; + + /* use I/O device register's value as restore value */ + *value = *(uint8_t *)(d->config + real_offset); + + /* create value for restoring to I/O device register */ + *value = PT_MERGE_VALUE(*value, dev_value, reg->emu_mask); + + return 0; +} + +/* restore word size emulate register */ +static int pt_word_reg_restore(struct pt_dev *ptdev, + struct pt_reg_tbl *cfg_entry, + uint32_t real_offset, uint16_t dev_value, uint16_t *value) +{ + struct pt_reg_info_tbl *reg = cfg_entry->reg; + PCIDevice *d = &ptdev->dev; + + /* use I/O device register's value as restore value */ + *value = *(uint16_t *)(d->config + real_offset); + + /* create value for restoring to I/O device register */ + *value = PT_MERGE_VALUE(*value, dev_value, reg->emu_mask); + + return 0; +} + +/* restore long size emulate register */ +static int pt_long_reg_restore(struct pt_dev *ptdev, + struct pt_reg_tbl *cfg_entry, + uint32_t real_offset, uint32_t dev_value, uint32_t *value) +{ + struct pt_reg_info_tbl *reg = cfg_entry->reg; + PCIDevice *d = &ptdev->dev; + + /* use I/O device register's value as restore value */ + *value = *(uint32_t *)(d->config + real_offset); + + /* create value for restoring to I/O device register */ + *value = PT_MERGE_VALUE(*value, dev_value, reg->emu_mask); + + return 0; +} + +/* restore Command register */ +static int pt_cmd_reg_restore(struct pt_dev *ptdev, + struct pt_reg_tbl *cfg_entry, + uint32_t real_offset, uint16_t dev_value, uint16_t *value) +{ + struct pt_reg_info_tbl *reg = cfg_entry->reg; + PCIDevice *d = &ptdev->dev; + uint16_t restorable_mask = 0; + + /* use I/O device register's value as restore value */ + *value = *(uint16_t *)(d->config + real_offset); + + /* create value for restoring to I/O device register + * but do not include Fast Back-to-Back Enable bit. + */ + restorable_mask = reg->emu_mask & ~PCI_COMMAND_FAST_BACK; + *value = PT_MERGE_VALUE(*value, dev_value, restorable_mask); + + return 0; +} + +/* restore BAR */ +static int pt_bar_reg_restore(struct pt_dev *ptdev, + struct pt_reg_tbl *cfg_entry, + uint32_t real_offset, uint32_t dev_value, uint32_t *value) +{ + struct pt_reg_info_tbl *reg = cfg_entry->reg; + uint32_t bar_emu_mask = 0; + int index = 0; + + /* get BAR index */ + index = pt_bar_offset_to_index(reg->offset); + if (index < 0) + { + /* exit I/O emulator */ + PT_LOG("Internal error: Invalid BAR index[%d]. " + "I/O emulator exit.\n", index); + exit(1); + } + + /* use value from kernel sysfs */ + if (ptdev->bases[index].bar_flag == PT_BAR_FLAG_UPPER) + *value = ptdev->pci_dev->base_addr[index-1] >> 32; + else + *value = ptdev->pci_dev->base_addr[index]; + + /* set emulate mask depend on BAR flag */ + switch (ptdev->bases[index].bar_flag) + { + case PT_BAR_FLAG_MEM: + bar_emu_mask = PT_BAR_MEM_EMU_MASK; + break; + case PT_BAR_FLAG_IO: + bar_emu_mask = PT_BAR_IO_EMU_MASK; + break; + case PT_BAR_FLAG_UPPER: + bar_emu_mask = PT_BAR_ALLF; + break; + default: + break; + } + + /* create value for restoring to I/O device register */ + *value = PT_MERGE_VALUE(*value, dev_value, bar_emu_mask); + + return 0; +} + +/* restore Power Management Control/Status register */ +static int pt_pmcsr_reg_restore(struct pt_dev *ptdev, + struct pt_reg_tbl *cfg_entry, + uint32_t real_offset, uint16_t dev_value, uint16_t *value) +{ + struct pt_reg_info_tbl *reg = cfg_entry->reg; + + /* create value for restoring to I/O device register + * No need to restore, just clear PME Enable and PME Status bit + * Note: register type of PME Status bit is RW1C, so clear by writing 1b + */ + *value = (dev_value & ~PCI_PM_CTRL_PME_ENABLE) | PCI_PM_CTRL_PME_STATUS; + + return 0; +} + struct pt_dev * register_real_device(PCIBus *e_bus, const char *e_dev_name, int e_devfn, uint8_t r_bus, uint8_t r_dev, uint8_t r_func, uint32_t machine_irq, struct pci_access *pci_access, @@ -3197,32 +3834,6 @@ struct pt_dev * register_real_device(PCIBus *e_bus, if (!assigned_device->dev.config[0x3d]) goto out; - e_device = (assigned_device->dev.devfn >> 3) & 0x1f; - /* fix virtual interrupt pin to INTA# */ - e_intx = 0; - - while (assigned_device->msi_trans_en) - { - if (pt_msi_setup(assigned_device)) - { - PT_LOG("Error: MSI-INTx translation MSI setup failed, fallback\n"); - assigned_device->msi_trans_en = 0; - break; - } - - rc = xc_domain_bind_pt_irq(xc_handle, domid, assigned_device->msi->pirq, - PT_IRQ_TYPE_MSI_TRANSLATE, 0, - e_device, e_intx, 0); - if ( rc < 0) - { - PT_LOG("Error: MSI-INTx translation bind failed, fallback\n"); - assigned_device->msi_trans_en = 0; - break; - } - msi_set_enable(assigned_device, 1); - break; - } - if ( PT_MACHINE_IRQ_AUTO == machine_irq ) { int pirq = pci_dev->irq; @@ -3242,12 +3853,16 @@ struct pt_dev * register_real_device(PCIBus *e_bus, } } - if (assigned_device->msi_trans_en) - goto out; + /* setup MSI-INTx translation if support */ + rc = pt_enable_msi_translate(assigned_device); /* bind machine_irq to device */ - if ( 0 != machine_irq ) + if (rc < 0 && machine_irq != 0) { + e_device = (assigned_device->dev.devfn >> 3) & 0x1f; + /* fix virtual interrupt pin to INTA# */ + e_intx = 0; + rc = xc_domain_bind_pt_pci_irq(xc_handle, domid, machine_irq, 0, e_device, e_intx); if ( rc < 0 ) diff --git a/hw/pass-through.h b/hw/pass-through.h index 7a623be..4704d83 100644 --- a/hw/pass-through.h +++ b/hw/pass-through.h @@ -24,6 +24,7 @@ #include "pci/pci.h" #include "exec-all.h" #include "sys-queue.h" +#include "qemu-timer.h" /* Log acesss */ #define PT_LOGGING_ENABLED @@ -59,6 +60,12 @@ #define PCI_CAP_ID_SSVID 0x0D #endif +#ifdef PCI_PM_CTRL_NO_SOFT_RESET +#undef PCI_PM_CTRL_NO_SOFT_RESET +#endif +/* No Soft Reset for D3hot->D0 */ +#define PCI_PM_CTRL_NO_SOFT_RESET 0x0008 + #ifndef PCI_MSI_FLAGS_MASK_BIT /* interrupt masking & reporting supported */ #define PCI_MSI_FLAGS_MASK_BIT 0x0100 @@ -79,6 +86,19 @@ #define PCI_EXP_TYPE_ROOT_EC 0xa #endif +#ifndef PCI_EXT_CAP_ID +/* Extended Capabilities (PCI-X 2.0 and PCI Express) */ +#define PCI_EXT_CAP_ID(header) (header & 0x0000ffff) +#endif + +#ifndef PCI_EXT_CAP_NEXT +/* Extended Capabilities (PCI-X 2.0 and PCI Express) */ +#define PCI_EXT_CAP_NEXT(header) ((header >> 20) & 0xffc) +#endif + +/* power state transition */ +#define PT_FLAG_TRANSITING 0x0001 + #define PT_INVALID_REG 0xFFFFFFFF /* invalid register value */ #define PT_BAR_ALLF 0xFFFFFFFF /* BAR ALLF value */ #define PT_BAR_MEM_RO_MASK 0x0000000F /* BAR ReadOnly mask(Memory) */ @@ -102,6 +122,8 @@ enum { }\ } while(0) +#define PT_MERGE_VALUE(value, data, val_mask) \ + (((value) & (val_mask)) | ((data) & ~(val_mask))) struct pt_region { /* Virtual phys base & size */ @@ -135,6 +157,7 @@ struct msix_entry_info { }; struct pt_msix_info { + uint32_t ctrl_offset; int enabled; int total_entries; int bar_index; @@ -147,6 +170,18 @@ struct pt_msix_info { struct msix_entry_info msix_entry[0]; }; +struct pt_pm_info { + QEMUTimer *pm_timer; /* QEMUTimer struct */ + int no_soft_reset; /* No Soft Reset flags */ + uint16_t flags; /* power state transition flags */ + uint16_t pmc_field; /* Power Management Capabilities field */ + int pm_delay; /* power state transition delay */ + uint16_t cur_state; /* current power state */ + uint16_t req_state; /* requested power state */ + uint32_t pm_base; /* Power Management Capability reg base offset */ + uint32_t aer_base; /* AER Capability reg base offset */ +}; + /* This structure holds the context of the mapping functions and data that is relevant for qemu device management. @@ -163,6 +198,7 @@ struct pt_dev { /* Physical MSI to guest INTx translation when possible */ int msi_trans_cap; int msi_trans_en; + struct pt_pm_info *pm_state; /* PM virtualization */ }; /* Used for formatting PCI BDF into cf8 format */ @@ -260,6 +296,24 @@ typedef int (*conf_byte_read) (struct pt_dev *ptdev, struct pt_reg_tbl *cfg_entry, uint8_t *value, uint8_t valid_mask); +/* emul reg long restore method */ +typedef int (*conf_dword_restore) (struct pt_dev *ptdev, + struct pt_reg_tbl *cfg_entry, + uint32_t real_offset, + uint32_t dev_value, + uint32_t *value); +/* emul reg word restore method */ +typedef int (*conf_word_restore) (struct pt_dev *ptdev, + struct pt_reg_tbl *cfg_entry, + uint32_t real_offset, + uint16_t dev_value, + uint16_t *value); +/* emul reg byte restore method */ +typedef int (*conf_byte_restore) (struct pt_dev *ptdev, + struct pt_reg_tbl *cfg_entry, + uint32_t real_offset, + uint8_t dev_value, + uint8_t *value); /* emul reg infomation table */ struct pt_reg_info_tbl { @@ -281,18 +335,24 @@ struct pt_reg_info_tbl { conf_dword_write write; /* emul reg long read method */ conf_dword_read read; + /* emul reg long restore method */ + conf_dword_restore restore; } dw; struct { /* emul reg word write method */ conf_word_write write; /* emul reg word read method */ conf_word_read read; + /* emul reg word restore method */ + conf_word_restore restore; } w; struct { /* emul reg byte write method */ conf_byte_write write; /* emul reg byte read method */ conf_byte_read read; + /* emul reg byte restore method */ + conf_byte_restore restore; } b; } u; }; diff --git a/hw/pci.h b/hw/pci.h index a527a39..2800499 100644 --- a/hw/pci.h +++ b/hw/pci.h @@ -44,7 +44,7 @@ typedef struct PCIIORegion { struct PCIDevice { /* PCI config space */ - uint8_t config[256]; + uint8_t config[4096]; /* the following fields are read only */ PCIBus *bus; diff --git a/hw/pt-msi.c b/hw/pt-msi.c index 9898763..c7a8f22 100644 --- a/hw/pt-msi.c +++ b/hw/pt-msi.c @@ -22,6 +22,41 @@ #include "pt-msi.h" #include <sys/mman.h> +static void msi_set_enable(struct pt_dev *dev, int en) +{ + uint16_t val = 0; + uint32_t address = 0; + if (!dev->msi) + return; + + address = dev->msi->ctrl_offset; + if (!address) + return; + + val = pci_read_word(dev->pci_dev, address); + val &= ~PCI_MSI_FLAGS_ENABLE; + val |= en & PCI_MSI_FLAGS_ENABLE; + pci_write_word(dev->pci_dev, address, val); +} + +static void msix_set_enable(struct pt_dev *dev, int en) +{ + uint16_t val = 0; + uint32_t address = 0; + if (!dev->msix) + return; + + address = dev->msix->ctrl_offset; + if (!address) + return; + + val = pci_read_word(dev->pci_dev, address); + val &= ~PCI_MSIX_ENABLE; + if (en) + val |= PCI_MSIX_ENABLE; + pci_write_word(dev->pci_dev, address, val); +} + /* MSI virtuailization functions */ /* @@ -95,6 +130,141 @@ int pt_msi_update(struct pt_dev *d) d->msi->pirq, gflags); } +void pt_msi_disable(struct pt_dev *dev) +{ + PCIDevice *d = &dev->dev; + uint8_t gvec = 0; + uint32_t gflags = 0; + uint64_t addr = 0; + uint8_t e_device = 0; + uint8_t e_intx = 0; + + msi_set_enable(dev, 0); + + e_device = (dev->dev.devfn >> 3) & 0x1f; + /* fix virtual interrupt pin to INTA# */ + e_intx = 0; + + if (dev->msi_trans_en) + { + if (xc_domain_unbind_pt_irq(xc_handle, domid, dev->msi->pirq, + PT_IRQ_TYPE_MSI_TRANSLATE, 0, + e_device, e_intx, 0)) + { + PT_LOG("Error: Unbinding pt irq for MSI-INTx failed!\n"); + goto out; + } + } + else if (!(dev->msi->flags & MSI_FLAG_UNINIT)) + { + /* get vector, address, flags info, etc. */ + gvec = dev->msi->data & 0xFF; + addr = (uint64_t)dev->msi->addr_hi << 32 | dev->msi->addr_lo; + gflags = __get_msi_gflags(dev->msi->data, addr); + + PT_LOG("Unbind msi with pirq %x, gvec %x\n", + dev->msi->pirq, gvec); + + if (xc_domain_unbind_msi_irq(xc_handle, domid, gvec, + dev->msi->pirq, gflags)) + { + PT_LOG("Error: Unbinding of MSI failed. [%02x:%02x.%x]\n", + pci_bus_num(d->bus), + ((d->devfn >> 3) & 0x1F), (d->devfn & 0x7)); + goto out; + } + } + + if (dev->msi->pirq != -1) + { + PT_LOG("Unmap msi with pirq %x\n", dev->msi->pirq); + + if (xc_physdev_unmap_pirq(xc_handle, domid, dev->msi->pirq)) + { + PT_LOG("Error: Unmapping of MSI failed. [%02x:%02x.%x]\n", + pci_bus_num(d->bus), + ((d->devfn >> 3) & 0x1F), (d->devfn & 0x7)); + goto out; + } + } + /* unbind INTx */ + if (dev->msi_trans_cap && !dev->msi_trans_en) + { + if (xc_domain_unbind_pt_irq(xc_handle, domid, dev->machine_irq, + PT_IRQ_TYPE_PCI, 0, e_device, e_intx, 0)) + PT_LOG("Error: Unbinding of interrupt failed!\n"); + } + +out: + /* clear msi info */ + dev->msi->flags = 0; + dev->msi->pirq = -1; + dev->msi_trans_en = 0; +} + +/* MSI-INTx translation virtulization functions */ +int pt_enable_msi_translate(struct pt_dev* dev) +{ + uint8_t e_device = 0; + uint8_t e_intx = 0; + + if (!(dev->msi && dev->msi_trans_cap)) + return -1; + + msi_set_enable(dev, 0); + dev->msi_trans_en = 0; + + if (pt_msi_setup(dev)) + { + PT_LOG("Error: MSI-INTx translation MSI setup failed, fallback\n"); + return -1; + } + + e_device = (dev->dev.devfn >> 3) & 0x1f; + /* fix virtual interrupt pin to INTA# */ + e_intx = 0; + + if (xc_domain_bind_pt_irq(xc_handle, domid, dev->msi->pirq, + PT_IRQ_TYPE_MSI_TRANSLATE, 0, + e_device, e_intx, 0)) + { + PT_LOG("Error: MSI-INTx translation bind failed, fallback\n"); + return -1; + } + + msi_set_enable(dev, 1); + dev->msi_trans_en = 1; + + return 0; +} + +void pt_disable_msi_translate(struct pt_dev *dev) +{ + uint8_t e_device = 0; + uint8_t e_intx = 0; + + /* MSI_ENABLE bit should be disabed until the new handler is set */ + msi_set_enable(dev, 0); + + e_device = (dev->dev.devfn >> 3) & 0x1f; + /* fix virtual interrupt pin to INTA# */ + e_intx = 0; + + if (xc_domain_unbind_pt_irq(xc_handle, domid, dev->msi->pirq, + PT_IRQ_TYPE_MSI_TRANSLATE, 0, + e_device, e_intx, 0)) + PT_LOG("Error: Unbinding pt irq for MSI-INTx failed!\n"); + + if (dev->machine_irq) + { + if (xc_domain_bind_pt_pci_irq(xc_handle, domid, dev->machine_irq, + 0, e_device, e_intx)) + PT_LOG("Error: Rebinding of interrupt failed!\n"); + } + + dev->msi_trans_en = 0; +} + /* MSI-X virtulization functions */ static void mask_physical_msix_entry(struct pt_dev *dev, int entry_nr, int mask) { @@ -159,6 +329,52 @@ int pt_msix_update(struct pt_dev *dev) return 0; } +void pt_msix_disable(struct pt_dev *dev) +{ + PCIDevice *d = &dev->dev; + uint8_t gvec = 0; + uint32_t gflags = 0; + uint64_t addr = 0; + int i = 0; + struct msix_entry_info *entry = NULL; + + msix_set_enable(dev, 0); + + for ( i = 0; i < dev->msix->total_entries; i++ ) + { + entry = &dev->msix->msix_entry[i]; + + if (entry->pirq == -1) + continue; + + gvec = entry->io_mem[2] & 0xff; + addr = *(uint64_t *)&entry->io_mem[0]; + gflags = __get_msi_gflags(entry->io_mem[2], addr); + + PT_LOG("Unbind msix with pirq %x, gvec %x\n", + entry->pirq, gvec); + + if (xc_domain_unbind_msi_irq(xc_handle, domid, gvec, + entry->pirq, gflags)) + PT_LOG("Error: Unbinding of MSI-X failed. [%02x:%02x.%x]\n", + pci_bus_num(d->bus), + ((d->devfn >> 3) & 0x1F), (d->devfn & 0x7)); + else + { + PT_LOG("Unmap msix with pirq %x\n", entry->pirq); + + if (xc_physdev_unmap_pirq(xc_handle, + domid, entry->pirq)) + PT_LOG("Error: Unmapping of MSI-X failed. [%02x:%02x.%x]\n", + pci_bus_num(d->bus), + ((d->devfn >> 3) & 0x1F), (d->devfn & 0x7)); + } + /* clear msi-x info */ + entry->pirq = -1; + entry->flags = 0; + } +} + static void pci_msix_invalid_write(void *opaque, target_phys_addr_t addr, uint32_t val) { diff --git a/hw/pt-msi.h b/hw/pt-msi.h index a8632d5..dea0848 100644 --- a/hw/pt-msi.h +++ b/hw/pt-msi.h @@ -85,9 +85,21 @@ __get_msi_gflags(uint32_t data, uint64_t addr); int pt_msi_update(struct pt_dev *d); +void +pt_msi_disable(struct pt_dev *dev); + +int +pt_enable_msi_translate(struct pt_dev* dev); + +void +pt_disable_msi_translate(struct pt_dev *dev); + int pt_msix_update(struct pt_dev *dev); +void +pt_msix_disable(struct pt_dev *dev); + int remove_msix_mapping(struct pt_dev *dev, int bar_index); diff --git a/qemu-timer.h b/qemu-timer.h index 7408edc..181428f 100644 --- a/qemu-timer.h +++ b/qemu-timer.h @@ -31,6 +31,8 @@ extern int64_t ticks_per_sec; void qemu_get_timer(QEMUFile *f, QEMUTimer *ts); void qemu_put_timer(QEMUFile *f, QEMUTimer *ts); +void qemu_run_one_timer(QEMUTimer *ts); + /* ptimer.c */ typedef struct ptimer_state ptimer_state; typedef void (*ptimer_cb)(void *opaque); diff --git a/vl.c b/vl.c index dd5d155..8539f6d 100644 --- a/vl.c +++ b/vl.c @@ -1286,6 +1286,22 @@ void qemu_get_timer(QEMUFile *f, QEMUTimer *ts) } } +/* run the specified timer */ +void qemu_run_one_timer(QEMUTimer *ts) +{ + uint64_t current_time; + + /* remove timer from the list before calling the callback */ + qemu_del_timer(ts); + + while ((current_time = qemu_get_clock(rt_clock)) < ts->expire_time) + /* sleep until the expire time */ + usleep((ts->expire_time - current_time) * 1000); + + /* run the callback */ + ts->cb(ts->opaque); +} + static void timer_save(QEMUFile *f, void *opaque) { if (cpu_ticks_enabled) { _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |