[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] [PATCH] Xen: Use the ioreq-server API when available
The ioreq-server API added to Xen 4.5 offers better security than the existing Xen/QEMU interface because the shared pages that are used to pass emulation request/results back and forth are removed from the guest's memory space before any requests are serviced. This prevents the guest from mapping these pages (they are in a well known location) and attempting to attack QEMU by synthesizing its own request structures. Hence, this patch modifies configure to detect whether the API is available, and adds the necessary code to use the API if it is. The ioreq-server API does require that PCI device models explicitly register with Xen for config space accesses, so to use the API the code in xen-hvm.c needs to be informed as PCI devices are added or removed from PCI buses. This patch therefore also adds a PCI bus listener interface akin to the memory listener interface to fulfil this need. Signed-off-by: Paul Durrant <paul.durrant@xxxxxxxxxx> Cc: Michael S. Tsirkin <mst@xxxxxxxxxx> Cc: Stefano Stabellini <stefano.stabellini@xxxxxxxxxxxxx> Cc: Peter Maydell <peter.maydell@xxxxxxxxxx> Cc: Paolo Bonzini <pbonzini@xxxxxxxxxx> Cc: Michael Tokarev <mjt@xxxxxxxxxx> Cc: Stefan Hajnoczi <stefanha@xxxxxxxxxx> Cc: Stefan Weil <sw@xxxxxxxxxxx> Cc: Andreas Faerber" <afaerber@xxxxxxx> Cc: Thomas Huth <thuth@xxxxxxxxxxxxxxxxxx> Cc: Peter Crosthwaite <peter.crosthwaite@xxxxxxxxxx> Cc: Eduardo Habkost <ehabkost@xxxxxxxxxx> Cc: Gerd Hoffmann <kraxel@xxxxxxxxxx> Cc: Alexey Kardashevskiy <aik@xxxxxxxxx> Cc: Alexander Graf <agraf@xxxxxxx> --- configure | 29 ++++++ hw/pci/pci.c | 65 ++++++++++++ include/hw/pci/pci.h | 9 ++ include/qemu/typedefs.h | 1 + trace-events | 8 ++ xen-hvm.c | 251 +++++++++++++++++++++++++++++++++++++++++++---- 6 files changed, 345 insertions(+), 18 deletions(-) diff --git a/configure b/configure index 9ac2600..c2db574 100755 --- a/configure +++ b/configure @@ -1876,6 +1876,32 @@ int main(void) { xc_gnttab_open(NULL, 0); xc_domain_add_to_physmap(0, 0, XENMAPSPACE_gmfn, 0, 0); xc_hvm_inject_msi(xc, 0, 0xf0000000, 0x00000000); + xc_hvm_create_ioreq_server(xc, 0, 0, NULL); + return 0; +} +EOF + compile_prog "" "$xen_libs" + then + xen_ctrl_version=450 + xen=yes + + elif + cat > $TMPC <<EOF && +#include <xenctrl.h> +#include <xenstore.h> +#include <stdint.h> +#include <xen/hvm/hvm_info_table.h> +#if !defined(HVM_MAX_VCPUS) +# error HVM_MAX_VCPUS not defined +#endif +int main(void) { + xc_interface *xc; + xs_daemon_open(); + xc = xc_interface_open(0, 0, 0); + xc_hvm_set_mem_type(0, 0, HVMMEM_ram_ro, 0, 0); + xc_gnttab_open(NULL, 0); + xc_domain_add_to_physmap(0, 0, XENMAPSPACE_gmfn, 0, 0); + xc_hvm_inject_msi(xc, 0, 0xf0000000, 0x00000000); return 0; } EOF @@ -4282,6 +4308,9 @@ if test -n "$sparc_cpu"; then echo "Target Sparc Arch $sparc_cpu" fi echo "xen support $xen" +if test "$xen" = "yes" ; then + echo "xen ctrl version $xen_ctrl_version" +fi echo "brlapi support $brlapi" echo "bluez support $bluez" echo "Documentation $docs" diff --git a/hw/pci/pci.c b/hw/pci/pci.c index 6ce75aa..53c955d 100644 --- a/hw/pci/pci.c +++ b/hw/pci/pci.c @@ -122,6 +122,66 @@ static uint16_t pci_default_sub_device_id = PCI_SUBDEVICE_ID_QEMU; static QLIST_HEAD(, PCIHostState) pci_host_bridges; +static QTAILQ_HEAD(pci_listeners, PCIListener) pci_listeners + = QTAILQ_HEAD_INITIALIZER(pci_listeners); + +enum ListenerDirection { Forward, Reverse }; + +#define PCI_LISTENER_CALL(_callback, _direction, _args...) \ + do { \ + PCIListener *_listener; \ + \ + switch (_direction) { \ + case Forward: \ + QTAILQ_FOREACH(_listener, &pci_listeners, link) { \ + if (_listener->_callback) { \ + _listener->_callback(_listener, ##_args); \ + } \ + } \ + break; \ + case Reverse: \ + QTAILQ_FOREACH_REVERSE(_listener, &pci_listeners, \ + pci_listeners, link) { \ + if (_listener->_callback) { \ + _listener->_callback(_listener, ##_args); \ + } \ + } \ + break; \ + default: \ + abort(); \ + } \ + } while (0) + +static int pci_listener_add(DeviceState *dev, void *opaque) +{ + if (object_dynamic_cast(OBJECT(dev), TYPE_PCI_DEVICE)) { + PCIDevice *pci_dev = PCI_DEVICE(dev); + + PCI_LISTENER_CALL(device_add, Forward, pci_dev); + } + + return 0; +} + +void pci_listener_register(PCIListener *listener) +{ + PCIHostState *host; + + QTAILQ_INSERT_TAIL(&pci_listeners, listener, link); + + QLIST_FOREACH(host, &pci_host_bridges, next) { + PCIBus *bus = host->bus; + + qbus_walk_children(&bus->qbus, NULL, NULL, pci_listener_add, + NULL, NULL); + } +} + +void pci_listener_unregister(PCIListener *listener) +{ + QTAILQ_REMOVE(&pci_listeners, listener, link); +} + static int pci_bar(PCIDevice *d, int reg) { uint8_t type; @@ -795,6 +855,8 @@ static void pci_config_free(PCIDevice *pci_dev) static void do_pci_unregister_device(PCIDevice *pci_dev) { + PCI_LISTENER_CALL(device_del, Reverse, pci_dev); + pci_dev->bus->devices[pci_dev->devfn] = NULL; pci_config_free(pci_dev); @@ -878,6 +940,9 @@ static PCIDevice *do_pci_register_device(PCIDevice *pci_dev, PCIBus *bus, pci_dev->config_write = config_write; bus->devices[devfn] = pci_dev; pci_dev->version_id = 2; /* Current pci device vmstate version */ + + PCI_LISTENER_CALL(device_add, Forward, pci_dev); + return pci_dev; } diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h index c352c7b..6c21b37 100644 --- a/include/hw/pci/pci.h +++ b/include/hw/pci/pci.h @@ -303,6 +303,15 @@ struct PCIDevice { MSIVectorPollNotifier msix_vector_poll_notifier; }; +struct PCIListener { + void (*device_add)(PCIListener *listener, PCIDevice *pci_dev); + void (*device_del)(PCIListener *listener, PCIDevice *pci_dev); + QTAILQ_ENTRY(PCIListener) link; +}; + +void pci_listener_register(PCIListener *listener); +void pci_listener_unregister(PCIListener *listener); + void pci_register_bar(PCIDevice *pci_dev, int region_num, uint8_t attr, MemoryRegion *memory); void pci_register_vga(PCIDevice *pci_dev, MemoryRegion *mem, diff --git a/include/qemu/typedefs.h b/include/qemu/typedefs.h index 04df51b..2b974c6 100644 --- a/include/qemu/typedefs.h +++ b/include/qemu/typedefs.h @@ -54,6 +54,7 @@ typedef struct PCIHostState PCIHostState; typedef struct PCIExpressHost PCIExpressHost; typedef struct PCIBus PCIBus; typedef struct PCIDevice PCIDevice; +typedef struct PCIListener PCIListener; typedef struct PCIExpressDevice PCIExpressDevice; typedef struct PCIBridge PCIBridge; typedef struct PCIEAERMsg PCIEAERMsg; diff --git a/trace-events b/trace-events index 011d105..3efcff7 100644 --- a/trace-events +++ b/trace-events @@ -895,6 +895,14 @@ pvscsi_tx_rings_num_pages(const char* label, uint32_t num) "Number of %s pages: # xen-hvm.c xen_ram_alloc(unsigned long ram_addr, unsigned long size) "requested: %#lx, size %#lx" xen_client_set_memory(uint64_t start_addr, unsigned long size, bool log_dirty) "%#"PRIx64" size %#lx, log_dirty %i" +xen_ioreq_server_create(uint32_t id) "id: %u" +xen_ioreq_server_destroy(uint32_t id) "id: %u" +xen_map_mmio_range(uint32_t id, uint64_t start_addr, uint64_t end_addr) "id: %u start: %#"PRIx64" end: %#"PRIx64 +xen_unmap_mmio_range(uint32_t id, uint64_t start_addr, uint64_t end_addr) "id: %u start: %#"PRIx64" end: %#"PRIx64 +xen_map_portio_range(uint32_t id, uint64_t start_addr, uint64_t end_addr) "id: %u start: %#"PRIx64" end: %#"PRIx64 +xen_unmap_portio_range(uint32_t id, uint64_t start_addr, uint64_t end_addr) "id: %u start: %#"PRIx64" end: %#"PRIx64 +xen_map_pcidev(uint32_t id, uint8_t bus, uint8_t dev, uint8_t func) "id: %u bdf: %02x.%02x.%02x" +xen_unmap_pcidev(uint32_t id, uint8_t bus, uint8_t dev, uint8_t func) "id: %u bdf: %02x.%02x.%02x" # xen-mapcache.c xen_map_cache(uint64_t phys_addr) "want %#"PRIx64 diff --git a/xen-hvm.c b/xen-hvm.c index 05e522c..031edd9 100644 --- a/xen-hvm.c +++ b/xen-hvm.c @@ -78,6 +78,7 @@ typedef struct XenPhysmap { } XenPhysmap; typedef struct XenIOState { + ioservid_t ioservid; shared_iopage_t *shared_page; buffered_iopage_t *buffered_io_page; QEMUTimer *buffered_io_timer; @@ -92,6 +93,8 @@ typedef struct XenIOState { struct xs_handle *xenstore; MemoryListener memory_listener; + MemoryListener io_listener; + PCIListener pci_listener; QLIST_HEAD(, XenPhysmap) physmap; hwaddr free_phys_offset; const XenPhysmap *log_for_dirtybit; @@ -480,6 +483,20 @@ static void xen_region_add(MemoryListener *listener, MemoryRegionSection *section) { memory_region_ref(section->mr); + +#if CONFIG_XEN_CTRL_INTERFACE_VERSION >= 450 + if (section->mr != &ram_memory) { + XenIOState *state = container_of(listener, XenIOState, memory_listener); + hwaddr start_addr = section->offset_within_address_space; + ram_addr_t size = int128_get64(section->size); + hwaddr end_addr = start_addr + size - 1; + + trace_xen_map_mmio_range(state->ioservid, start_addr, end_addr); + xc_hvm_map_io_range_to_ioreq_server(xen_xc, xen_domid, state->ioservid, + 1, start_addr, end_addr); + } +#endif + xen_set_memory(listener, section, true); } @@ -487,9 +504,81 @@ static void xen_region_del(MemoryListener *listener, MemoryRegionSection *section) { xen_set_memory(listener, section, false); + +#if CONFIG_XEN_CTRL_INTERFACE_VERSION >= 450 + if (section->mr != &ram_memory) { + XenIOState *state = container_of(listener, XenIOState, memory_listener); + hwaddr start_addr = section->offset_within_address_space; + ram_addr_t size = int128_get64(section->size); + hwaddr end_addr = start_addr + size - 1; + + trace_xen_unmap_mmio_range(state->ioservid, start_addr, end_addr); + xc_hvm_unmap_io_range_from_ioreq_server(xen_xc, xen_domid, state->ioservid, + 1, start_addr, end_addr); + } +#endif + + memory_region_unref(section->mr); +} + +#if CONFIG_XEN_CTRL_INTERFACE_VERSION >= 450 +static void xen_io_add(MemoryListener *listener, + MemoryRegionSection *section) +{ + XenIOState *state = container_of(listener, XenIOState, io_listener); + hwaddr start_addr = section->offset_within_address_space; + ram_addr_t size = int128_get64(section->size); + hwaddr end_addr = start_addr + size - 1; + + trace_xen_map_portio_range(state->ioservid, start_addr, end_addr); + xc_hvm_map_io_range_to_ioreq_server(xen_xc, xen_domid, state->ioservid, + 0, start_addr, end_addr); + + memory_region_ref(section->mr); +} + +static void xen_io_del(MemoryListener *listener, + MemoryRegionSection *section) +{ + XenIOState *state = container_of(listener, XenIOState, io_listener); + hwaddr start_addr = section->offset_within_address_space; + ram_addr_t size = int128_get64(section->size); + hwaddr end_addr = start_addr + size - 1; + + trace_xen_unmap_portio_range(state->ioservid, start_addr, end_addr); + xc_hvm_unmap_io_range_from_ioreq_server(xen_xc, xen_domid, state->ioservid, + 0, start_addr, end_addr); + memory_region_unref(section->mr); } +static void xen_pci_add(PCIListener *listener, + PCIDevice *pci_dev) +{ + XenIOState *state = container_of(listener, XenIOState, pci_listener); + + trace_xen_map_pcidev(state->ioservid, pci_bus_num(pci_dev->bus), + PCI_SLOT(pci_dev->devfn), PCI_FUNC(pci_dev->devfn)); + xc_hvm_map_pcidev_to_ioreq_server(xen_xc, xen_domid, state->ioservid, + 0, pci_bus_num(pci_dev->bus), + PCI_SLOT(pci_dev->devfn), + PCI_FUNC(pci_dev->devfn)); +} + +static void xen_pci_del(PCIListener *listener, + PCIDevice *pci_dev) +{ + XenIOState *state = container_of(listener, XenIOState, pci_listener); + + trace_xen_unmap_pcidev(state->ioservid, pci_bus_num(pci_dev->bus), + PCI_SLOT(pci_dev->devfn), PCI_FUNC(pci_dev->devfn)); + xc_hvm_unmap_pcidev_from_ioreq_server(xen_xc, xen_domid, state->ioservid, + 0, pci_bus_num(pci_dev->bus), + PCI_SLOT(pci_dev->devfn), + PCI_FUNC(pci_dev->devfn)); +} +#endif + static void xen_sync_dirty_bitmap(XenIOState *state, hwaddr start_addr, ram_addr_t size) @@ -590,6 +679,19 @@ static MemoryListener xen_memory_listener = { .priority = 10, }; +#if CONFIG_XEN_CTRL_INTERFACE_VERSION >= 450 +static MemoryListener xen_io_listener = { + .region_add = xen_io_add, + .region_del = xen_io_del, + .priority = 10, +}; + +static PCIListener xen_pci_listener = { + .device_add = xen_pci_add, + .device_del = xen_pci_del, +}; +#endif + /* get the ioreq packets from share mem */ static ioreq_t *cpu_get_ioreq_from_shared_memory(XenIOState *state, int vcpu) { @@ -792,6 +894,29 @@ static void handle_ioreq(ioreq_t *req) case IOREQ_TYPE_INVALIDATE: xen_invalidate_map_cache(); break; +#if CONFIG_XEN_CTRL_INTERFACE_VERSION >= 450 + case IOREQ_TYPE_PCI_CONFIG: { + uint32_t sbdf = req->addr >> 32; + uint32_t val; + + /* Fake a write to port 0xCF8 so that + * the config space access will target the + * correct device model. + */ + val = (1u << 31) | + ((req->addr & 0x0f00) << 16) | + ((sbdf & 0xffff) << 8) | + (req->addr & 0xfc); + do_outp(0xcf8, 4, val); + + /* Now issue the config space access via + * port 0xCFC + */ + req->addr = 0xcfc | (req->addr & 0x03); + cpu_ioreq_pio(req); + break; + } +#endif default: hw_error("Invalid ioreq type 0x%x\n", req->type); } @@ -979,13 +1104,39 @@ static void xen_wakeup_notifier(Notifier *notifier, void *data) xc_set_hvm_param(xen_xc, xen_domid, HVM_PARAM_ACPI_S_STATE, 0); } +#if CONFIG_XEN_CTRL_INTERFACE_VERSION >= 450 +static void xen_hvm_pre_save(void *opaque) +{ + XenIOState *state = opaque; + + /* Stop servicing emulation requests */ + trace_xen_ioreq_server_destroy(state->ioservid); + xc_hvm_set_ioreq_server_state(xen_xc, xen_domid, state->ioservid, 0); + xc_hvm_destroy_ioreq_server(xen_xc, xen_domid, state->ioservid); +} + +static const VMStateDescription vmstate_xen_hvm = { + .name = "xen-hvm", + .version_id = 4, + .minimum_version_id = 4, + .pre_save = xen_hvm_pre_save, + .fields = (VMStateField[]) { + VMSTATE_END_OF_LIST() + }, +}; +#endif + /* return 0 means OK, or -1 means critical issue -- will exit(1) */ int xen_hvm_init(ram_addr_t *below_4g_mem_size, ram_addr_t *above_4g_mem_size, MemoryRegion **ram_memory) { int i, rc; - unsigned long ioreq_pfn; - unsigned long bufioreq_evtchn; +#if CONFIG_XEN_CTRL_INTERFACE_VERSION < 450 + unsigned long param; +#endif + xen_pfn_t ioreq_pfn; + xen_pfn_t bufioreq_pfn; + evtchn_port_t bufioreq_evtchn; XenIOState *state; state = g_malloc0(sizeof (XenIOState)); @@ -1002,6 +1153,16 @@ int xen_hvm_init(ram_addr_t *below_4g_mem_size, ram_addr_t *above_4g_mem_size, return -1; } +#if CONFIG_XEN_CTRL_INTERFACE_VERSION >= 450 + rc = xc_hvm_create_ioreq_server(xen_xc, xen_domid, 1, &state->ioservid); + if (rc < 0) { + perror("xen: ioreq server create"); + return -1; + } + + trace_xen_ioreq_server_create(state->ioservid); +#endif + state->exit.notify = xen_exit_notifier; qemu_add_exit_notifier(&state->exit); @@ -1011,23 +1172,71 @@ int xen_hvm_init(ram_addr_t *below_4g_mem_size, ram_addr_t *above_4g_mem_size, state->wakeup.notify = xen_wakeup_notifier; qemu_register_wakeup_notifier(&state->wakeup); - xc_get_hvm_param(xen_xc, xen_domid, HVM_PARAM_IOREQ_PFN, &ioreq_pfn); +#if CONFIG_XEN_CTRL_INTERFACE_VERSION >= 450 + rc = xc_hvm_get_ioreq_server_info(xen_xc, xen_domid, state->ioservid, + &ioreq_pfn, &bufioreq_pfn, + &bufioreq_evtchn); + if (rc < 0) { + hw_error("failed to get ioreq server info: error %d handle=" XC_INTERFACE_FMT, + errno, xen_xc); + } +#else + rc = xc_get_hvm_param(xen_xc, xen_domid, HVM_PARAM_IOREQ_PFN, ¶m); + if (rc < 0) { + fprintf(stderr, "failed to get HVM_PARAM_IOREQ_PFN\n"); + return -1; + } + + ioreq_pfn = param; + + rc = xc_get_hvm_param(xen_xc, xen_domid, HVM_PARAM_BUFIOREQ_PFN, ¶m); + if (rc < 0) { + fprintf(stderr, "failed to get HVM_PARAM_BUFIOREQ_PFN\n"); + return -1; + } + + bufioreq_pfn = param; + + rc = xc_get_hvm_param(xen_xc, xen_domid, HVM_PARAM_BUFIOREQ_EVTCHN, + ¶m); + if (rc < 0) { + fprintf(stderr, "failed to get HVM_PARAM_BUFIOREQ_EVTCHN\n"); + return -1; + } + + bufioreq_evtchn = param; +#endif + DPRINTF("shared page at pfn %lx\n", ioreq_pfn); - state->shared_page = xc_map_foreign_range(xen_xc, xen_domid, XC_PAGE_SIZE, - PROT_READ|PROT_WRITE, ioreq_pfn); + DPRINTF("buffered io page at pfn %lx\n", bufioreq_pfn); + DPRINTF("buffered io evtchn is %x\n", bufioreq_evtchn); + + state->shared_page = xc_map_foreign_range(xen_xc, xen_domid, + XC_PAGE_SIZE, + PROT_READ|PROT_WRITE, + ioreq_pfn); if (state->shared_page == NULL) { hw_error("map shared IO page returned error %d handle=" XC_INTERFACE_FMT, errno, xen_xc); } - xc_get_hvm_param(xen_xc, xen_domid, HVM_PARAM_BUFIOREQ_PFN, &ioreq_pfn); - DPRINTF("buffered io page at pfn %lx\n", ioreq_pfn); - state->buffered_io_page = xc_map_foreign_range(xen_xc, xen_domid, XC_PAGE_SIZE, - PROT_READ|PROT_WRITE, ioreq_pfn); + state->buffered_io_page = xc_map_foreign_range(xen_xc, xen_domid, + XC_PAGE_SIZE, + PROT_READ|PROT_WRITE, + bufioreq_pfn); if (state->buffered_io_page == NULL) { hw_error("map buffered IO page returned error %d", errno); } +#if CONFIG_XEN_CTRL_INTERFACE_VERSION >= 450 + rc = xc_hvm_set_ioreq_server_state(xen_xc, xen_domid, state->ioservid, + 1); + if (rc < 0) { + hw_error("failed to enable ioreq server info: error %d handle=" XC_INTERFACE_FMT, + errno, xen_xc); + } +#endif + state->ioreq_local_port = g_malloc0(max_cpus * sizeof (evtchn_port_t)); /* FIXME: how about if we overflow the page here? */ @@ -1035,22 +1244,16 @@ int xen_hvm_init(ram_addr_t *below_4g_mem_size, ram_addr_t *above_4g_mem_size, rc = xc_evtchn_bind_interdomain(state->xce_handle, xen_domid, xen_vcpu_eport(state->shared_page, i)); if (rc == -1) { - fprintf(stderr, "bind interdomain ioctl error %d\n", errno); + fprintf(stderr, "shared evtchn %d bind error %d\n", i, errno); return -1; } state->ioreq_local_port[i] = rc; } - rc = xc_get_hvm_param(xen_xc, xen_domid, HVM_PARAM_BUFIOREQ_EVTCHN, - &bufioreq_evtchn); - if (rc < 0) { - fprintf(stderr, "failed to get HVM_PARAM_BUFIOREQ_EVTCHN\n"); - return -1; - } rc = xc_evtchn_bind_interdomain(state->xce_handle, xen_domid, - (uint32_t)bufioreq_evtchn); + bufioreq_evtchn); if (rc == -1) { - fprintf(stderr, "bind interdomain ioctl error %d\n", errno); + fprintf(stderr, "buffered evtchn bind error %d\n", errno); return -1; } state->bufioreq_local_port = rc; @@ -1061,11 +1264,23 @@ int xen_hvm_init(ram_addr_t *below_4g_mem_size, ram_addr_t *above_4g_mem_size, qemu_add_vm_change_state_handler(xen_hvm_change_state_handler, state); +#if CONFIG_XEN_CTRL_INTERFACE_VERSION >= 450 + vmstate_register(NULL, 0, &vmstate_xen_hvm, state); +#endif + state->memory_listener = xen_memory_listener; QLIST_INIT(&state->physmap); memory_listener_register(&state->memory_listener, &address_space_memory); state->log_for_dirtybit = NULL; +#if CONFIG_XEN_CTRL_INTERFACE_VERSION >= 450 + state->io_listener = xen_io_listener; + memory_listener_register(&state->io_listener, &address_space_io); + + state->pci_listener = xen_pci_listener; + pci_listener_register(&state->pci_listener); +#endif + /* Initialize backend core & drivers */ if (xen_be_init() != 0) { fprintf(stderr, "%s: xen backend core setup failed\n", __FUNCTION__); -- 1.7.10.4 _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxx http://lists.xen.org/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |