[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-changelog] [xen-unstable] x86 hvm: Support MSI-X for HVM domains.
# HG changeset patch # User Keir Fraser <keir.fraser@xxxxxxxxxx> # Date 1210583246 -3600 # Node ID 4afc6023e8eca87590d7d6e89bebad45f299235c # Parent 5c00188dd159825e47547da59f691b2a3c59ac44 x86 hvm: Support MSI-X for HVM domains. Signed-off-by: Shan Haitao <Haitao.shan@xxxxxxxxx> --- tools/ioemu/hw/pass-through.c | 11 + tools/ioemu/hw/pass-through.h | 21 +- tools/ioemu/hw/pt-msi.c | 424 +++++++++++++++++++++++++++++++++++++++--- tools/ioemu/hw/pt-msi.h | 15 + tools/libxc/xc_physdev.c | 2 tools/libxc/xenctrl.h | 1 6 files changed, 445 insertions(+), 29 deletions(-) diff -r 5c00188dd159 -r 4afc6023e8ec tools/ioemu/hw/pass-through.c --- a/tools/ioemu/hw/pass-through.c Mon May 12 10:06:40 2008 +0100 +++ b/tools/ioemu/hw/pass-through.c Mon May 12 10:07:26 2008 +0100 @@ -205,6 +205,7 @@ void pt_iomem_map(PCIDevice *d, int i, u if ( !first_map ) { + add_msix_mapping(assigned_device, i); /* Remove old mapping */ ret = xc_domain_memory_mapping(xc_handle, domid, old_ebase >> XC_PAGE_SHIFT, @@ -227,6 +228,9 @@ void pt_iomem_map(PCIDevice *d, int i, u if ( ret != 0 ) PT_LOG("Error: create new mapping failed!\n"); + ret = remove_msix_mapping(assigned_device, i); + if ( ret != 0 ) + PT_LOG("Error: remove MSX-X mmio mapping failed!\n"); } /* Being called each time a pio region has been updated */ @@ -289,6 +293,9 @@ static void pt_pci_write_config(PCIDevic } if ( pt_msi_write(assigned_device, address, val, len) ) + return; + + if ( pt_msix_write(assigned_device, address, val, len) ) return; /* PCI config pass-through */ @@ -338,6 +345,7 @@ static uint32_t pt_pci_read_config(PCIDe } pt_msi_read(assigned_device, address, len, &val); + pt_msix_read(assigned_device, address, len, &val); exit: #ifdef PT_DEBUG_PCI_CONFIG_ACCESS @@ -549,6 +557,9 @@ struct pt_dev * register_real_device(PCI if ( (pos = find_cap_offset(pci_dev, PCI_CAP_ID_MSI)) ) pt_msi_init(assigned_device, pos); + if ( (pos = find_cap_offset(pci_dev, PCI_CAP_ID_MSIX)) ) + pt_msix_init(assigned_device, pos); + /* Handle real device's MMIO/PIO BARs */ pt_register_regions(assigned_device); diff -r 5c00188dd159 -r 4afc6023e8ec tools/ioemu/hw/pass-through.h --- a/tools/ioemu/hw/pass-through.h Mon May 12 10:06:40 2008 +0100 +++ b/tools/ioemu/hw/pass-through.h Mon May 12 10:07:26 2008 +0100 @@ -61,8 +61,26 @@ struct pt_msi_info { uint32_t flags; int offset; int size; - int pvec; /* physical vector used */ int pirq; /* guest pirq corresponding */ +}; + +struct msix_entry_info { + int pirq; /* -1 means unmapped */ + int flags; /* flags indicting whether MSI ADDR or DATA is updated */ + uint32_t io_mem[4]; +}; + +struct pt_msix_info { + int enabled; + int offset; + int total_entries; + int bar_index; + uint32_t table_off; + u64 mmio_base_addr; + int mmio_index; + int fd; + void *phys_iomem_base; + struct msix_entry_info msix_entry[0]; }; /* @@ -74,6 +92,7 @@ struct pt_dev { struct pci_dev *pci_dev; /* libpci struct */ struct pt_region bases[PCI_NUM_REGIONS]; /* Access regions */ struct pt_msi_info *msi; /* MSI virtualization */ + struct pt_msix_info *msix; /* MSI-X virtualization */ }; /* Used for formatting PCI BDF into cf8 format */ diff -r 5c00188dd159 -r 4afc6023e8ec tools/ioemu/hw/pt-msi.c --- a/tools/ioemu/hw/pt-msi.c Mon May 12 10:06:40 2008 +0100 +++ b/tools/ioemu/hw/pt-msi.c Mon May 12 10:07:26 2008 +0100 @@ -20,7 +20,9 @@ */ #include "pt-msi.h" - +#include <sys/mman.h> + +/* MSI virtuailization functions */ #define PT_MSI_CTRL_WR_MASK_HI (0x1) #define PT_MSI_CTRL_WR_MASK_LO (0x8E) #define PT_MSI_DATA_WR_MASK (0x38) @@ -76,7 +78,7 @@ int pt_msi_init(struct pt_dev *dev, int */ static int pt_msi_setup(struct pt_dev *dev) { - int vector = -1, pirq = -1; + int pirq = -1; if ( !(dev->msi->flags & MSI_FLAG_UNINIT) ) { @@ -85,15 +87,15 @@ static int pt_msi_setup(struct pt_dev *d } if ( xc_physdev_map_pirq_msi(xc_handle, domid, MAP_PIRQ_TYPE_MSI, - vector, &pirq, + AUTO_ASSIGN, &pirq, dev->pci_dev->dev << 3 | dev->pci_dev->func, - dev->pci_dev->bus, 1) ) - { - PT_LOG("error map vector %x\n", vector); + dev->pci_dev->bus, 0, 1) ) + { + PT_LOG("error map msi\n"); return -1; } dev->msi->pirq = pirq; - PT_LOG("vector %x pirq %x\n", vector, pirq); + PT_LOG("msi mapped with pirq %x\n", pirq); return 0; } @@ -147,15 +149,10 @@ static uint8_t get_msi_gctrl(struct pt_d return *(uint8_t *)(pd->config + d->msi->offset + PCI_MSI_FLAGS); } -static uint32_t get_msi_gflags(struct pt_dev *d) +static uint32_t __get_msi_gflags(uint32_t data, uint64_t addr) { uint32_t result = 0; int rh, dm, dest_id, deliv_mode, trig_mode; - uint16_t data; - uint64_t addr; - - data = get_msi_gdata(d); - addr = get_msi_gaddr(d); rh = (addr >> MSI_ADDR_REDIRECTION_SHIFT) & 0x1; dm = (addr >> MSI_ADDR_DESTMODE_SHIFT) & 0x1; @@ -170,25 +167,20 @@ static uint32_t get_msi_gflags(struct pt return result; } +static uint32_t get_msi_gflags(struct pt_dev *d) +{ + uint16_t data = get_msi_gdata(d); + uint64_t addr = get_msi_gaddr(d); + + return __get_msi_gflags(data, addr); +} + /* * This may be arch different */ static inline uint8_t get_msi_gvec(struct pt_dev *d) { return get_msi_gdata(d) & 0xff; -} - -static inline uint8_t get_msi_hvec(struct pt_dev *d) -{ - struct pci_dev *pd = d->pci_dev; - uint16_t data; - - if ( d->msi->flags & PCI_MSI_FLAGS_64BIT ) - data = pci_read_word(pd, PCI_MSI_DATA_64); - else - data = pci_read_word(pd, PCI_MSI_DATA_32); - - return data & 0xff; } /* @@ -198,7 +190,7 @@ static int pt_msi_update(struct pt_dev * static int pt_msi_update(struct pt_dev *d) { PT_LOG("now update msi with pirq %x gvec %x\n", - get_msi_gvec(d), d->msi->pirq); + d->msi->pirq, get_msi_gvec(d)); return xc_domain_update_msi_irq(xc_handle, domid, get_msi_gvec(d), d->msi->pirq, get_msi_gflags(d)); } @@ -266,7 +258,6 @@ static int static int pt_msi_map_update(struct pt_dev *d, uint32_t old_data, uint64_t old_addr) { - uint16_t pctrl; uint32_t data; uint64_t addr; @@ -301,6 +292,8 @@ static int pt_msi_mask_update(struct pt_ if ( old_mask != mask ) pci_write_long(pd, offset, mask); + + return 0; } #define ACCESSED_DATA 0x2 @@ -486,3 +479,378 @@ int pt_msi_read(struct pt_dev *d, int ad return e_len; } +/* MSI-X virtulization functions */ +#define PT_MSIX_CTRL_WR_MASK_HI (0xC0) +static void mask_physical_msix_entry(struct pt_dev *dev, int entry_nr, int mask) +{ + void *phys_off; + + phys_off = dev->msix->phys_iomem_base + 16 * entry_nr + 12; + *(uint32_t *)phys_off = mask; +} + +static int pt_msix_update_one(struct pt_dev *dev, int entry_nr) +{ + struct msix_entry_info *entry = &dev->msix->msix_entry[entry_nr]; + int pirq = entry->pirq; + int gvec = entry->io_mem[2] & 0xff; + uint64_t gaddr = *(uint64_t *)&entry->io_mem[0]; + uint32_t gflags = __get_msi_gflags(entry->io_mem[2], gaddr); + int ret; + + if ( !entry->flags ) + return 0; + + /* Check if this entry is already mapped */ + if ( entry->pirq == -1 ) + { + ret = xc_physdev_map_pirq_msi(xc_handle, domid, MAP_PIRQ_TYPE_MSI, + AUTO_ASSIGN, &pirq, + dev->pci_dev->dev << 3 | dev->pci_dev->func, + dev->pci_dev->bus, entry_nr, 0); + if ( ret ) + { + PT_LOG("error map msix entry %x\n", entry_nr); + return ret; + } + entry->pirq = pirq; + } + + PT_LOG("now update msix entry %x with pirq %x gvec %x\n", + entry_nr, pirq, gvec); + + ret = xc_domain_update_msi_irq(xc_handle, domid, gvec, pirq, gflags); + if ( ret ) + { + PT_LOG("error update msix irq info for entry %d\n", entry_nr); + return ret; + } + + entry->flags = 0; + + return 0; +} + +static int pt_msix_update(struct pt_dev *dev) +{ + struct pt_msix_info *msix = dev->msix; + int i; + + for ( i = 0; i < msix->total_entries; i++ ) + { + pt_msix_update_one(dev, i); + } + + return 0; +} + +static void pci_msix_invalid_write(void *opaque, target_phys_addr_t addr, + uint32_t val) +{ + PT_LOG("invalid write to MSI-X table, \ + only dword access is allowed.\n"); +} + +static void pci_msix_writel(void *opaque, target_phys_addr_t addr, uint32_t val) +{ + struct pt_dev *dev = (struct pt_dev *)opaque; + struct pt_msix_info *msix = dev->msix; + struct msix_entry_info *entry; + int entry_nr, offset; + + if ( addr % 4 ) + { + PT_LOG("unaligned dword access to MSI-X table, addr %016lx\n", + addr); + return; + } + + entry_nr = (addr - msix->mmio_base_addr) / 16; + entry = &msix->msix_entry[entry_nr]; + offset = ((addr - msix->mmio_base_addr) % 16) / 4; + + if ( offset != 3 && msix->enabled && entry->io_mem[3] & 0x1 ) + { + PT_LOG("can not update msix entry %d since MSI-X is already \ + function now.\n", entry_nr); + return; + } + + if ( offset != 3 && entry->io_mem[offset] != val ) + entry->flags = 1; + entry->io_mem[offset] = val; + + if ( offset == 3 ) + { + if ( !(val & 0x1) ) + pt_msix_update_one(dev, entry_nr); + mask_physical_msix_entry(dev, entry_nr, entry->io_mem[3] & 0x1); + } +} + +static CPUWriteMemoryFunc *pci_msix_write[] = { + pci_msix_invalid_write, + pci_msix_invalid_write, + pci_msix_writel +}; + +static uint32_t pci_msix_invalid_read(void *opaque, target_phys_addr_t addr) +{ + PT_LOG("invalid read to MSI-X table, \ + only dword access is allowed.\n"); + return 0; +} + +static uint32_t pci_msix_readl(void *opaque, target_phys_addr_t addr) +{ + struct pt_dev *dev = (struct pt_dev *)opaque; + struct pt_msix_info *msix = dev->msix; + int entry_nr, offset; + + if ( addr % 4 ) + { + PT_LOG("unaligned dword access to MSI-X table, addr %016lx\n", + addr); + return 0; + } + + entry_nr = (addr - msix->mmio_base_addr) / 16; + offset = ((addr - msix->mmio_base_addr) % 16) / 4; + + return msix->msix_entry[entry_nr].io_mem[offset]; +} + +static CPUReadMemoryFunc *pci_msix_read[] = { + pci_msix_invalid_read, + pci_msix_invalid_read, + pci_msix_readl +}; + +int add_msix_mapping(struct pt_dev *dev, int bar_index) +{ + if ( !(dev->msix && dev->msix->bar_index == bar_index) ) + return 0; + + return xc_domain_memory_mapping(xc_handle, domid, + dev->msix->mmio_base_addr >> XC_PAGE_SHIFT, + (dev->bases[bar_index].access.maddr + + dev->msix->table_off) >> XC_PAGE_SHIFT, + (dev->msix->total_entries * 16 + + XC_PAGE_SIZE -1) >> XC_PAGE_SHIFT, + DPCI_ADD_MAPPING); +} + +int remove_msix_mapping(struct pt_dev *dev, int bar_index) +{ + if ( !(dev->msix && dev->msix->bar_index == bar_index) ) + return 0; + + dev->msix->mmio_base_addr = dev->bases[bar_index].e_physbase + + dev->msix->table_off; + + cpu_register_physical_memory(dev->msix->mmio_base_addr, + dev->msix->total_entries * 16, + dev->msix->mmio_index); + + return xc_domain_memory_mapping(xc_handle, domid, + dev->msix->mmio_base_addr >> XC_PAGE_SHIFT, + (dev->bases[bar_index].access.maddr + + dev->msix->table_off) >> XC_PAGE_SHIFT, + (dev->msix->total_entries * 16 + + XC_PAGE_SIZE -1) >> XC_PAGE_SHIFT, + DPCI_REMOVE_MAPPING); +} + +int pt_msix_init(struct pt_dev *dev, int pos) +{ + uint8_t id; + uint16_t flags, control; + int i, total_entries, table_off, bar_index; + u64 bar_base; + struct pci_dev *pd = dev->pci_dev; + + id = pci_read_byte(pd, pos + PCI_CAP_LIST_ID); + + if ( id != PCI_CAP_ID_MSIX ) + { + PT_LOG("error id %x pos %x\n", id, pos); + return -1; + } + + control = pci_read_word(pd, pos + 2); + total_entries = control & 0x7ff; + total_entries += 1; + + dev->msix = malloc(sizeof(struct pt_msix_info) + + total_entries*sizeof(struct msix_entry_info)); + if ( !dev->msix ) + { + PT_LOG("error allocation pt_msix_info\n"); + return -1; + } + memset(dev->msix, 0, sizeof(struct pt_msix_info) + + total_entries*sizeof(struct msix_entry_info)); + dev->msix->total_entries = total_entries; + dev->msix->offset = pos; + for ( i = 0; i < total_entries; i++ ) + dev->msix->msix_entry[i].pirq = -1; + + dev->msix->mmio_index = + cpu_register_io_memory(0, pci_msix_read, pci_msix_write, dev); + + flags = pci_read_word(pd, pos + PCI_MSI_FLAGS); + if ( flags & PCI_MSIX_ENABLE ) + { + PT_LOG("MSIX enabled already, disable first\n"); + pci_write_word(pd, pos + PCI_MSI_FLAGS, flags & ~PCI_MSIX_ENABLE); + *(uint16_t *)&dev->dev.config[pos + PCI_MSI_FLAGS] + = flags & ~(PCI_MSIX_ENABLE | PCI_MSIX_MASK); + } + + table_off = pci_read_long(pd, pos + PCI_MSIX_TABLE); + bar_index = dev->msix->bar_index = table_off & PCI_MSIX_BIR; + table_off &= table_off & ~PCI_MSIX_BIR; + bar_base = pci_read_long(pd, 0x10 + 4 * bar_index); + if ( (bar_base & 0x6) == 0x4 ) + { + bar_base &= ~0xf; + bar_base += (u64)pci_read_long(pd, 0x10 + 4 * (bar_index + 1)) << 32; + } + PT_LOG("get MSI-X table bar base %lx\n", bar_base); + + dev->msix->fd = open("/dev/mem", O_RDWR); + dev->msix->phys_iomem_base = mmap(0, total_entries * 16, + PROT_WRITE | PROT_READ, MAP_SHARED | MAP_LOCKED, + dev->msix->fd, bar_base + table_off); + PT_LOG("mapping physical MSI-X table to %lx\n", + (unsigned long)dev->msix->phys_iomem_base); + return 0; +} + +static int pt_msix_enable(struct pt_dev *d, int enable) +{ + uint16_t ctrl; + struct pci_dev *pd = d->pci_dev; + + if ( !pd ) + return -1; + + ctrl = pci_read_word(pd, d->msix->offset + PCI_MSI_FLAGS); + if ( enable ) + ctrl |= PCI_MSIX_ENABLE; + else + ctrl &= ~PCI_MSIX_ENABLE; + pci_write_word(pd, d->msix->offset + PCI_MSI_FLAGS, ctrl); + d->msix->enabled = !!enable; + + return 0; +} + +static int pt_msix_func_mask(struct pt_dev *d, int mask) +{ + uint16_t ctrl; + struct pci_dev *pd = d->pci_dev; + + if ( !pd ) + return -1; + + ctrl = pci_read_word(pd, d->msix->offset + PCI_MSI_FLAGS); + + if ( mask ) + ctrl |= PCI_MSIX_MASK; + else + ctrl &= ~PCI_MSIX_MASK; + + pci_write_word(pd, d->msix->offset + PCI_MSI_FLAGS, ctrl); + return 0; +} + +static int pt_msix_control_update(struct pt_dev *d) +{ + PCIDevice *pd = (PCIDevice *)d; + uint16_t ctrl = *(uint16_t *)(&pd->config[d->msix->offset + 2]); + + if ( ctrl & PCI_MSIX_ENABLE && !(ctrl & PCI_MSIX_MASK ) ) + pt_msix_update(d); + + pt_msix_func_mask(d, ctrl & PCI_MSIX_MASK); + pt_msix_enable(d, ctrl & PCI_MSIX_ENABLE); + + return 0; +} + +int pt_msix_write(struct pt_dev *d, uint32_t addr, uint32_t val, uint32_t len) +{ + struct pci_dev *pd; + int i, cur = addr; + uint8_t value; + PCIDevice *dev = (PCIDevice *)d; + + if ( !d || !d->msix ) + return 0; + + if ( (addr >= (d->msix->offset + 4) ) || + (addr + len) < d->msix->offset) + return 0; + + PT_LOG("addr %x val %x len %x offset %x\n", + addr, val, len, d->msix->offset); + + pd = d->pci_dev; + + for ( i = 0; i < len; i++, cur++ ) + { + uint8_t orig_value; + + if ( cur != d->msix->offset + 3 ) + continue; + + value = (val >> (i * 8)) & 0xff; + + orig_value = pci_read_byte(pd, cur); + value = (orig_value & ~PT_MSIX_CTRL_WR_MASK_HI) | + (value & PT_MSIX_CTRL_WR_MASK_HI); + dev->config[cur] = value; + pt_msix_control_update(d); + return 1; + } + + return 0; +} + +int pt_msix_read(struct pt_dev *d, int addr, int len, uint32_t *val) +{ + int e_addr = addr, e_len = len, offset = 0, i; + uint8_t e_val = 0; + PCIDevice *pd = (PCIDevice *)d; + + if ( !d || !d->msix ) + return 0; + + if ( (addr > (d->msix->offset + 3) ) || + (addr + len) <= d->msix->offset ) + return 0; + + if ( (addr + len ) > (d->msix->offset + 3) ) + e_len -= addr + len - d->msix->offset - 3; + + if ( addr < d->msix->offset ) + { + e_addr = d->msix->offset; + offset = d->msix->offset - addr; + e_len -= offset; + } + + for ( i = 0; i < e_len; i++ ) + { + e_val = *(uint8_t *)(&pd->config[e_addr] + i); + *val &= ~(0xff << ( (offset + i) * 8)); + *val |= (e_val << ( (offset + i) * 8)); + } + + PT_LOG("addr %x len %x val %x offset %x\n", + addr, len, *val, d->msix->offset); + + return e_len; +} + diff -r 5c00188dd159 -r 4afc6023e8ec tools/ioemu/hw/pt-msi.h --- a/tools/ioemu/hw/pt-msi.h Mon May 12 10:06:40 2008 +0100 +++ b/tools/ioemu/hw/pt-msi.h Mon May 12 10:07:26 2008 +0100 @@ -62,4 +62,19 @@ int int pt_msi_read(struct pt_dev *d, int addr, int len, uint32_t *val); +int +remove_msix_mapping(struct pt_dev *dev, int bar_index); + +int +add_msix_mapping(struct pt_dev *dev, int bar_index); + +int +pt_msix_init(struct pt_dev *dev, int pos); + +int +pt_msix_write(struct pt_dev *d, uint32_t addr, uint32_t val, uint32_t len); + +int +pt_msix_read(struct pt_dev *d, int addr, int len, uint32_t *val); + #endif diff -r 5c00188dd159 -r 4afc6023e8ec tools/libxc/xc_physdev.c --- a/tools/libxc/xc_physdev.c Mon May 12 10:06:40 2008 +0100 +++ b/tools/libxc/xc_physdev.c Mon May 12 10:07:26 2008 +0100 @@ -52,6 +52,7 @@ int xc_physdev_map_pirq_msi(int xc_handl int *pirq, int devfn, int bus, + int entry_nr, int msi_type) { int rc; @@ -66,6 +67,7 @@ int xc_physdev_map_pirq_msi(int xc_handl map.pirq = *pirq; map.msi_info.devfn = devfn; map.msi_info.bus = bus; + map.msi_info.entry_nr = entry_nr; map.msi_info.msi = msi_type; rc = do_physdev_op(xc_handle, PHYSDEVOP_map_pirq, &map); diff -r 5c00188dd159 -r 4afc6023e8ec tools/libxc/xenctrl.h --- a/tools/libxc/xenctrl.h Mon May 12 10:06:40 2008 +0100 +++ b/tools/libxc/xenctrl.h Mon May 12 10:07:26 2008 +0100 @@ -859,6 +859,7 @@ int xc_physdev_map_pirq_msi(int xc_handl int *pirq, int devfn, int bus, + int entry_nr, int msi_type); int xc_physdev_unmap_pirq(int xc_handle, _______________________________________________ Xen-changelog mailing list Xen-changelog@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-changelog
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |