|
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] [PATCH 3/3] qemu-xen: Add 64 bits big bar support on qemu xen
Currently it is assumed PCI device BAR access < 4G memory. If there is such a
device whose BAR size is larger than 4G, it must access > 4G memory address.
This patch enable the 64bits big BAR support on qemu-xen.
Signed-off-by: Xiantao Zhang <xiantao.zhang@xxxxxxxxx>
Signed-off-by: Xudong Hao <xudong.hao@xxxxxxxxx>
diff --git a/hw/pass-through.c b/hw/pass-through.c
index 6e396e3..9087fa5 100644
--- a/hw/pass-through.c
+++ b/hw/pass-through.c
@@ -1117,13 +1117,13 @@ uint8_t pci_intx(struct pt_dev *ptdev)
}
static int _pt_iomem_helper(struct pt_dev *assigned_device, int i,
- uint32_t e_base, uint32_t e_size, int op)
+ unsigned long e_base, unsigned long e_size, int op)
{
if ( has_msix_mapping(assigned_device, i) )
{
- uint32_t msix_last_pfn = (assigned_device->msix->mmio_base_addr - 1 +
+ unsigned long msix_last_pfn = (assigned_device->msix->mmio_base_addr -
1 +
assigned_device->msix->total_entries * 16) >> XC_PAGE_SHIFT;
- uint32_t bar_last_pfn = (e_base + e_size - 1) >> XC_PAGE_SHIFT;
+ unsigned long bar_last_pfn = (e_base + e_size - 1) >> XC_PAGE_SHIFT;
int ret = 0;
if ( assigned_device->msix->table_off )
@@ -1159,26 +1159,33 @@ static void pt_iomem_map(PCIDevice *d, int i, uint32_t
e_phys, uint32_t e_size,
int type)
{
struct pt_dev *assigned_device = (struct pt_dev *)d;
- uint32_t old_ebase = assigned_device->bases[i].e_physbase;
+ uint64_t e_phys64 = e_phys, e_size64 = e_size, old_ebase =
assigned_device->bases[i].e_physbase;
int first_map = ( assigned_device->bases[i].e_size == 0 );
+ PCIIORegion *r = &d->io_regions[i];
int ret = 0;
- assigned_device->bases[i].e_physbase = e_phys;
- assigned_device->bases[i].e_size= e_size;
-
- PT_LOG("e_phys=%08x maddr=%lx type=%d len=%d index=%d first_map=%d\n",
- e_phys, (unsigned long)assigned_device->bases[i].access.maddr,
- type, e_size, i, first_map);
-
- if ( e_size == 0 )
+ if ( assigned_device->bases[i + 1].bar_flag == PT_BAR_FLAG_UPPER) {
+ uint64_t upper_addr = (r + 1)->addr;
+ uint64_t upper_size = (r + 1)->size;
+ e_phys64 += upper_addr << 32;
+ e_size64 += upper_size << 32;
+ }
+ PT_LOG("e_phys64=%lx maddr=%lx type=%d len=%lx index=%d first_map=%d\n",
+ e_phys64, (unsigned long)assigned_device->bases[i].access.maddr,
+ type, e_size64, i, first_map);
+
+ if(e_size64== 0 || !valid_addr(e_phys64))
return;
+ assigned_device->bases[i].e_physbase = e_phys64;
+ assigned_device->bases[i].e_size= e_size64;
+
if ( !first_map && old_ebase != -1 )
{
if ( has_msix_mapping(assigned_device, i) )
unregister_iomem(assigned_device->msix->mmio_base_addr);
- ret = _pt_iomem_helper(assigned_device, i, old_ebase, e_size,
+ ret = _pt_iomem_helper(assigned_device, i, old_ebase, e_size64,
DPCI_REMOVE_MAPPING);
if ( ret != 0 )
{
@@ -1188,7 +1195,7 @@ static void pt_iomem_map(PCIDevice *d, int i, uint32_t
e_phys, uint32_t e_size,
}
/* map only valid guest address */
- if (e_phys != -1)
+ if (e_phys64 != -1)
{
if ( has_msix_mapping(assigned_device, i) )
{
@@ -1202,7 +1209,7 @@ static void pt_iomem_map(PCIDevice *d, int i, uint32_t
e_phys, uint32_t e_size,
assigned_device->msix->mmio_index);
}
- ret = _pt_iomem_helper(assigned_device, i, e_phys, e_size,
+ ret = _pt_iomem_helper(assigned_device, i, e_phys64, e_size64,
DPCI_ADD_MAPPING);
if ( ret != 0 )
{
@@ -1210,7 +1217,7 @@ static void pt_iomem_map(PCIDevice *d, int i, uint32_t
e_phys, uint32_t e_size,
return;
}
- if ( old_ebase != e_phys && old_ebase != -1 )
+ if ( old_ebase != e_phys64 && old_ebase != -1 )
pt_msix_update_remap(assigned_device, i);
}
}
@@ -1853,7 +1860,7 @@ exit:
static void pt_libpci_fixup(struct pci_dev *dev)
{
-#if !defined(PCI_LIB_VERSION) || PCI_LIB_VERSION < 0x030100
+#if !defined(PCI_LIB_VERSION) || PCI_LIB_VERSION <= 0x030100
int i;
FILE *fp;
char path[PATH_MAX], buf[256];
@@ -1907,7 +1914,7 @@ static int pt_dev_is_virtfn(struct pci_dev *dev)
static int pt_register_regions(struct pt_dev *assigned_device)
{
- int i = 0;
+ int i = 0, current_bar, bar_flag;
uint32_t bar_data = 0;
struct pci_dev *pci_dev = assigned_device->pci_dev;
PCIDevice *d = &assigned_device->dev;
@@ -1916,6 +1923,7 @@ static int pt_register_regions(struct pt_dev
*assigned_device)
/* Register PIO/MMIO BARs */
for ( i = 0; i < PCI_BAR_ENTRIES; i++ )
{
+ current_bar = i;
if ( pt_pci_base_addr(pci_dev->base_addr[i]) )
{
assigned_device->bases[i].e_physbase =
@@ -1928,18 +1936,26 @@ static int pt_register_regions(struct pt_dev
*assigned_device)
pci_register_io_region((PCIDevice *)assigned_device, i,
(uint32_t)pci_dev->size[i], PCI_ADDRESS_SPACE_IO,
pt_ioport_map);
- else if ( pci_dev->base_addr[i] & PCI_ADDRESS_SPACE_MEM_PREFETCH )
+ else if ( pci_dev->base_addr[i] & PCI_ADDRESS_SPACE_MEM_64BIT) {
+ bar_flag = pci_dev->base_addr[i] & 0xf;
pci_register_io_region((PCIDevice *)assigned_device, i,
- (uint32_t)pci_dev->size[i], PCI_ADDRESS_SPACE_MEM_PREFETCH,
+ (uint32_t)pci_dev->size[i], bar_flag,
pt_iomem_map);
- else
- pci_register_io_region((PCIDevice *)assigned_device, i,
- (uint32_t)pci_dev->size[i], PCI_ADDRESS_SPACE_MEM,
+ pci_register_io_region((PCIDevice *)assigned_device, i + 1,
+ (uint32_t)(pci_dev->size[i] >> 32), PCI_ADDRESS_SPACE_MEM,
pt_iomem_map);
-
- PT_LOG("IO region registered (size=0x%08x base_addr=0x%08x)\n",
- (uint32_t)(pci_dev->size[i]),
- (uint32_t)(pci_dev->base_addr[i]));
+ /* skip upper half. */
+ i++;
+ }
+ else {
+ bar_flag = pci_dev->base_addr[i] & 0xf;
+ pci_register_io_region((PCIDevice *)assigned_device, i,
+ (uint32_t)(pci_dev->size[i]), bar_flag,
+ pt_iomem_map);
+ }
+ PT_LOG("IO region registered (bar:%d,size=0x%lx
base_addr=0x%lx)\n", current_bar,
+ (pci_dev->size[current_bar]),
+ (pci_dev->base_addr[current_bar]));
}
}
@@ -1984,7 +2000,7 @@ static void pt_unregister_regions(struct pt_dev
*assigned_device)
type = d->io_regions[i].type;
- if ( type == PCI_ADDRESS_SPACE_MEM ||
+ if ( type == PCI_ADDRESS_SPACE_MEM || type ==
PCI_ADDRESS_SPACE_MEM_64BIT ||
type == PCI_ADDRESS_SPACE_MEM_PREFETCH )
{
ret = _pt_iomem_helper(assigned_device, i,
@@ -2117,6 +2133,7 @@ int pt_pci_host_write(struct pci_dev *pci_dev, u32 addr,
u32 val, int len)
return ret;
}
+static uint64_t pt_get_bar_size(PCIIORegion *r);
/* parse BAR */
static int pt_bar_reg_parse(
struct pt_dev *ptdev, struct pt_reg_info_tbl *reg)
@@ -2145,7 +2162,7 @@ static int pt_bar_reg_parse(
/* check unused BAR */
r = &d->io_regions[index];
- if (!r->size)
+ if (!pt_get_bar_size(r))
goto out;
/* for ExpROM BAR */
@@ -2165,6 +2182,86 @@ out:
return bar_flag;
}
+static bool is_64bit_bar(PCIIORegion *r)
+{
+ return !!(r->type & PCI_ADDRESS_SPACE_MEM_64BIT);
+}
+
+static uint64_t pt_get_bar_size(PCIIORegion *r)
+{
+ if (is_64bit_bar(r))
+ {
+ uint64_t size64;
+ size64 = (r + 1)->size;
+ size64 <<= 32;
+ size64 += r->size;
+ return size64;
+ }
+ return r->size;
+}
+
+static uint64_t pt_get_bar_base(PCIIORegion *r)
+{
+ if (is_64bit_bar(r))
+ {
+ uint64_t base64;
+
+ base64 = (r + 1)->addr;
+ base64 <<= 32;
+ base64 += r->addr;
+ return base64;
+ }
+ return r->addr;
+}
+
+int pt_chk_bar_overlap(PCIBus *bus, int devfn, uint64_t addr,
+ uint64_t size, uint8_t type)
+{
+ PCIDevice *devices = NULL;
+ PCIIORegion *r;
+ int ret = 0;
+ int i, j;
+
+ /* check Overlapped to Base Address */
+ for (i=0; i<256; i++)
+ {
+ if ( !(devices = bus->devices[i]) )
+ continue;
+
+ /* skip itself */
+ if (devices->devfn == devfn)
+ continue;
+
+ for (j=0; j<PCI_NUM_REGIONS; j++)
+ {
+ r = &devices->io_regions[j];
+
+ /* skip different resource type, but don't skip when
+ * prefetch and non-prefetch memory are compared.
+ */
+ if (type != r->type)
+ {
+ if (type == PCI_ADDRESS_SPACE_IO ||
+ r->type == PCI_ADDRESS_SPACE_IO)
+ continue;
+ }
+
+ if ((addr < (pt_get_bar_base(r) + pt_get_bar_size(r))) && ((addr +
size) > pt_get_bar_base(r)))
+ {
+ printf("Overlapped to device[%02x:%02x.%x][Region:%d]"
+ "[Address:%lxh][Size:%lxh]\n", bus->bus_num,
+ (devices->devfn >> 3) & 0x1F, (devices->devfn & 0x7),
+ j, pt_get_bar_base(r), pt_get_bar_size(r));
+ ret = 1;
+ goto out;
+ }
+ }
+ }
+
+out:
+ return ret;
+}
+
/* mapping BAR */
static void pt_bar_mapping_one(struct pt_dev *ptdev, int bar, int io_enable,
int mem_enable)
@@ -2174,13 +2271,13 @@ static void pt_bar_mapping_one(struct pt_dev *ptdev,
int bar, int io_enable,
struct pt_reg_grp_tbl *reg_grp_entry = NULL;
struct pt_reg_tbl *reg_entry = NULL;
struct pt_region *base = NULL;
- uint32_t r_size = 0, r_addr = -1;
+ uint64_t r_size = 0, r_addr = -1;
int ret = 0;
r = &dev->io_regions[bar];
-
+
/* check valid region */
- if (!r->size)
+ if (!pt_get_bar_size(r))
return;
base = &ptdev->bases[bar];
@@ -2190,12 +2287,13 @@ static void pt_bar_mapping_one(struct pt_dev *ptdev,
int bar, int io_enable,
return;
/* copy region address to temporary */
- r_addr = r->addr;
+ r_addr = pt_get_bar_base(r);
/* need unmapping in case I/O Space or Memory Space disable */
if (((base->bar_flag == PT_BAR_FLAG_IO) && !io_enable ) ||
((base->bar_flag == PT_BAR_FLAG_MEM) && !mem_enable ))
r_addr = -1;
+
if ( (bar == PCI_ROM_SLOT) && (r_addr != -1) )
{
reg_grp_entry = pt_find_reg_grp(ptdev, PCI_ROM_ADDRESS);
@@ -2208,26 +2306,27 @@ static void pt_bar_mapping_one(struct pt_dev *ptdev,
int bar, int io_enable,
}
/* prevent guest software mapping memory resource to 00000000h */
- if ((base->bar_flag == PT_BAR_FLAG_MEM) && (r_addr == 0))
+ if ((base->bar_flag == PT_BAR_FLAG_MEM) && (pt_get_bar_base(r) == 0))
r_addr = -1;
/* align resource size (memory type only) */
- r_size = r->size;
+ r_size = pt_get_bar_size(r);
PT_GET_EMUL_SIZE(base->bar_flag, r_size);
/* check overlapped address */
ret = pt_chk_bar_overlap(dev->bus, dev->devfn,
r_addr, r_size, r->type);
if (ret > 0)
- PT_LOG_DEV(dev, "Warning: [Region:%d][Address:%08xh]"
- "[Size:%08xh] is overlapped.\n", bar, r_addr, r_size);
+ PT_LOG("Warning: ptdev[%02x:%02x.%x][Region:%d][Address:%lxh]"
+ "[Size:%lxh] is overlapped.\n", pci_bus_num(dev->bus),
+ PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn), bar, r_addr, r_size);
/* check whether we need to update the mapping or not */
if (r_addr != ptdev->bases[bar].e_physbase)
{
/* mapping BAR */
- r->map_func((PCIDevice *)ptdev, bar, r_addr,
- r_size, r->type);
+ r->map_func((PCIDevice *)ptdev, bar, (uint32_t)r_addr,
+ (uint32_t)r_size, r->type);
}
}
@@ -2823,7 +2922,7 @@ static uint32_t pt_bar_reg_init(struct pt_dev *ptdev,
}
/* set initial guest physical base address to -1 */
- ptdev->bases[index].e_physbase = -1;
+ ptdev->bases[index].e_physbase = -1UL;
/* set BAR flag */
ptdev->bases[index].bar_flag = pt_bar_reg_parse(ptdev, reg);
@@ -3506,7 +3605,10 @@ static int pt_bar_reg_write(struct pt_dev *ptdev,
{
case PT_BAR_FLAG_MEM:
bar_emu_mask = PT_BAR_MEM_EMU_MASK;
- bar_ro_mask = PT_BAR_MEM_RO_MASK | (r_size - 1);
+ if (!r_size)
+ bar_ro_mask = PT_BAR_ALLF;
+ else
+ bar_ro_mask = PT_BAR_MEM_RO_MASK | (r_size - 1);
break;
case PT_BAR_FLAG_IO:
bar_emu_mask = PT_BAR_IO_EMU_MASK;
@@ -3514,7 +3616,10 @@ static int pt_bar_reg_write(struct pt_dev *ptdev,
break;
case PT_BAR_FLAG_UPPER:
bar_emu_mask = PT_BAR_ALLF;
- bar_ro_mask = 0; /* all upper 32bit are R/W */
+ if (!r_size)
+ bar_ro_mask = 0;
+ else
+ bar_ro_mask = r_size - 1;
break;
default:
break;
@@ -3527,6 +3632,7 @@ static int pt_bar_reg_write(struct pt_dev *ptdev,
/* check whether we need to update the virtual region address or not */
switch (ptdev->bases[index].bar_flag)
{
+ case PT_BAR_FLAG_UPPER:
case PT_BAR_FLAG_MEM:
/* nothing to do */
break;
@@ -3550,42 +3656,6 @@ static int pt_bar_reg_write(struct pt_dev *ptdev,
goto exit;
}
break;
- case PT_BAR_FLAG_UPPER:
- if (cfg_entry->data)
- {
- if (cfg_entry->data != (PT_BAR_ALLF & ~bar_ro_mask))
- {
- PT_LOG_DEV(d, "Warning: Guest attempt to set high MMIO Base
Address. "
- "Ignore mapping. "
- "[Offset:%02xh][High Address:%08xh]\n",
- reg->offset, cfg_entry->data);
- }
- /* clear lower address */
- d->io_regions[index-1].addr = -1;
- }
- else
- {
- /* find lower 32bit BAR */
- prev_offset = (reg->offset - 4);
- reg_grp_entry = pt_find_reg_grp(ptdev, prev_offset);
- if (reg_grp_entry)
- {
- reg_entry = pt_find_reg(reg_grp_entry, prev_offset);
- if (reg_entry)
- /* restore lower address */
- d->io_regions[index-1].addr = reg_entry->data;
- else
- return -1;
- }
- else
- return -1;
- }
-
- /* never mapping the 'empty' upper region,
- * because we'll do it enough for the lower region.
- */
- r->addr = -1;
- goto exit;
default:
break;
}
@@ -3599,7 +3669,7 @@ static int pt_bar_reg_write(struct pt_dev *ptdev,
* rather than mmio. Remapping this value to mmio should be prevented.
*/
- if ( cfg_entry->data != writable_mask )
+ if ( cfg_entry->data != writable_mask || !cfg_entry->data)
r->addr = cfg_entry->data;
exit:
diff --git a/hw/pass-through.h b/hw/pass-through.h
index d7d837c..b651192 100644
--- a/hw/pass-through.h
+++ b/hw/pass-through.h
@@ -158,10 +158,13 @@ enum {
#define PT_MERGE_VALUE(value, data, val_mask) \
(((value) & (val_mask)) | ((data) & ~(val_mask)))
+#define valid_addr(addr) \
+ (addr >= 0x80000000 && !(addr & 0xfff))
+
struct pt_region {
/* Virtual phys base & size */
- uint32_t e_physbase;
- uint32_t e_size;
+ uint64_t e_physbase;
+ uint64_t e_size;
/* Index of region in qemu */
uint32_t memory_index;
/* BAR flag */
diff --git a/hw/pci.c b/hw/pci.c
index f051de1..839863d 100644
--- a/hw/pci.c
+++ b/hw/pci.c
@@ -39,24 +39,6 @@ extern int igd_passthru;
//#define DEBUG_PCI
-struct PCIBus {
- int bus_num;
- int devfn_min;
- pci_set_irq_fn set_irq;
- pci_map_irq_fn map_irq;
- uint32_t config_reg; /* XXX: suppress */
- /* low level pic */
- SetIRQFunc *low_set_irq;
- qemu_irq *irq_opaque;
- PCIDevice *devices[256];
- PCIDevice *parent_dev;
- PCIBus *next;
- /* The bus IRQ state is the logical OR of the connected devices.
- Keep a count of the number of devices with raised IRQs. */
- int nirq;
- int irq_count[];
-};
-
static void pci_update_mappings(PCIDevice *d);
static void pci_set_irq(void *opaque, int irq_num, int level);
@@ -938,50 +920,3 @@ PCIBus *pci_bridge_init(PCIBus *bus, int devfn, uint16_t
vid, uint16_t did,
return s->bus;
}
-int pt_chk_bar_overlap(PCIBus *bus, int devfn, uint32_t addr,
- uint32_t size, uint8_t type)
-{
- PCIDevice *devices = NULL;
- PCIIORegion *r;
- int ret = 0;
- int i, j;
-
- /* check Overlapped to Base Address */
- for (i=0; i<256; i++)
- {
- if ( !(devices = bus->devices[i]) )
- continue;
-
- /* skip itself */
- if (devices->devfn == devfn)
- continue;
-
- for (j=0; j<PCI_NUM_REGIONS; j++)
- {
- r = &devices->io_regions[j];
-
- /* skip different resource type, but don't skip when
- * prefetch and non-prefetch memory are compared.
- */
- if (type != r->type)
- {
- if (type == PCI_ADDRESS_SPACE_IO ||
- r->type == PCI_ADDRESS_SPACE_IO)
- continue;
- }
-
- if ((addr < (r->addr + r->size)) && ((addr + size) > r->addr))
- {
- printf("Overlapped to device[%02x:%02x.%x][Region:%d]"
- "[Address:%08xh][Size:%08xh]\n", bus->bus_num,
- (devices->devfn >> 3) & 0x1F, (devices->devfn & 0x7),
- j, r->addr, r->size);
- ret = 1;
- goto out;
- }
- }
- }
-
-out:
- return ret;
-}
diff --git a/hw/pci.h b/hw/pci.h
index edc58b6..a036cc3 100644
--- a/hw/pci.h
+++ b/hw/pci.h
@@ -137,6 +137,7 @@ typedef int PCIUnregisterFunc(PCIDevice *pci_dev);
#define PCI_ADDRESS_SPACE_MEM 0x00
#define PCI_ADDRESS_SPACE_IO 0x01
+#define PCI_ADDRESS_SPACE_MEM_64BIT 0x04
#define PCI_ADDRESS_SPACE_MEM_PREFETCH 0x08
typedef struct PCIIORegion {
@@ -240,8 +241,8 @@ void pci_register_io_region(PCIDevice *pci_dev, int
region_num,
uint32_t size, int type,
PCIMapIORegionFunc *map_func);
-int pt_chk_bar_overlap(PCIBus *bus, int devfn, uint32_t addr,
- uint32_t size, uint8_t type);
+int pt_chk_bar_overlap(PCIBus *bus, int devfn, uint64_t addr,
+ uint64_t size, uint8_t type);
uint32_t pci_default_read_config(PCIDevice *d,
uint32_t address, int len);
@@ -360,5 +361,23 @@ void pci_bridge_write_config(PCIDevice *d,
uint32_t address, uint32_t val, int len);
PCIBus *pci_register_secondary_bus(PCIDevice *dev, pci_map_irq_fn map_irq);
+struct PCIBus {
+ int bus_num;
+ int devfn_min;
+ pci_set_irq_fn set_irq;
+ pci_map_irq_fn map_irq;
+ uint32_t config_reg; /* XXX: suppress */
+ /* low level pic */
+ SetIRQFunc *low_set_irq;
+ qemu_irq *irq_opaque;
+ PCIDevice *devices[256];
+ PCIDevice *parent_dev;
+ PCIBus *next;
+ /* The bus IRQ state is the logical OR of the connected devices.
+ Keep a count of the number of devices with raised IRQs. */
+ int nirq;
+ int irq_count[];
+};
+
#endif
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |