|
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [PATCH RFC 2/2] vhost-user: implement VHOST_USER_PROTOCOL_F_XEN_MMAP
From: Dusan Stojkovic <Dusan.Stojkovic@xxxxxxxxx>
The vhost-user specification reserves protocol feature bit 17 and
documents an extended memory region description for backends that map
guest memory through Xen rather than mapping a file descriptor each
region carries two extra fields, "xen mmap flags" and "domid" (see
docs/interop/vhost-user.rst, "Memory region description").
The layout is implemented by rust-vmm's vhost and vm-memory crates
and used by Xen vhost-user device backends.
Implement the front-end side for foreign mappings:
- negotiate VHOST_USER_PROTOCOL_F_XEN_MMAP
- when negotiated, build SET_MEM_TABLE payloads from the extended
region layout, with xen_mmap_flags = FOREIGN and
xen_mmap_data set to the guest's domain id.
- under Xen, do not call vhost_user_get_mr_data(): guest RAM has no fd
and its userspace_addr does not correspond to a valid mapping in the
address space. Backends map regions through privcmd using the guest
physical address and domid; the fd accompanying each region only
satisfies the protocol's one-fd-per-region requirement. Pass a
/dev/xen/privcmd fd and close it once the message has been sent.
Tracepoints for opening and closing xen fds are added as well.
- suppress VHOST_USER_PROTOCOL_F_CONFIGURE_MEM_SLOTS under Xen:
Postcopy is likewise refused.
The userspace_addr field is carried unchanged; Xen backends derive
mappings from guest_phys_addr and domid and do not interpret it.
Signed-off-by: Dusan Stojkovic <Dusan.Stojkovic@xxxxxxxxx>
Signed-off-by: Nikola Jelic <Nikola.Jelic@xxxxxxxxx>
---
hw/virtio/trace-events | 2 +
hw/virtio/vhost-user.c | 120 +++++++++++++++++++++++++++++++++++++++--
include/hw/virtio/vhost-user.h | 2 +-
3 files changed, 120 insertions(+), 4 deletions(-)
diff --git a/hw/virtio/trace-events b/hw/virtio/trace-events
index 2a57edc21e..0f3c58fd78 100644
--- a/hw/virtio/trace-events
+++ b/hw/virtio/trace-events
@@ -30,6 +30,8 @@ vhost_user_postcopy_fault_handler_found(int i, uint64_t
region_offset, uint64_t
vhost_user_postcopy_listen(void) ""
vhost_user_set_mem_table_postcopy(uint64_t client_addr, uint64_t qhva, int
reply_i, int region_i) "client:0x%"PRIx64" for hva: 0x%"PRIx64" reply %d region
%d"
vhost_user_set_mem_table_withfd(int index, const char *name, uint64_t
memory_size, uint64_t guest_phys_addr, uint64_t userspace_addr, uint64_t
offset) "%d:%s: size:0x%"PRIx64" GPA:0x%"PRIx64" QVA/userspace:0x%"PRIx64" RB
offset:0x%"PRIx64
+vhost_user_open_region_fd(int index, int fd) "region:%d fd:%d"
+vhost_user_put_region_fds(int index, int fd) "region:%d fd:%d"
vhost_user_postcopy_waker(const char *rb, uint64_t rb_offset) "%s + 0x%"PRIx64
vhost_user_postcopy_waker_found(uint64_t client_addr) "0x%"PRIx64
vhost_user_postcopy_waker_nomatch(const char *rb, uint64_t rb_offset) "%s +
0x%"PRIx64
diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c
index d627351f45..932ead4eeb 100644
--- a/hw/virtio/vhost-user.c
+++ b/hw/virtio/vhost-user.c
@@ -30,6 +30,8 @@
#include "migration/postcopy-ram.h"
#include "trace.h"
#include "system/ramblock.h"
+#include "system/xen.h"
+#include "hw/xen/xen.h"
#include <sys/ioctl.h>
#include <sys/socket.h>
@@ -181,12 +183,36 @@ typedef struct VhostUserMemoryRegion {
uint64_t mmap_offset;
} VhostUserMemoryRegion;
+/*
+ * Memory region flags for VHOST_USER_PROTOCOL_F_XEN_MMAP, matching the
+ * values used by rust-vmm's vm-memory (MmapXenFlags).
+ */
+#define VHOST_USER_XEN_MMAP_FLAG_FOREIGN 0x1
+#define VHOST_USER_XEN_MMAP_FLAG_GRANT 0x2
+
+/*
+ * Extended memory region description, used when
+ * VHOST_USER_PROTOCOL_F_XEN_MMAP has been negotiated.
+ */
+typedef struct VhostUserMemoryRegionXen {
+ VhostUserMemoryRegion region;
+ uint32_t xen_mmap_flags;
+ uint32_t xen_mmap_data; /* domain id for FOREIGN/GRANT mappings */
+} VhostUserMemoryRegionXen;
+
+
typedef struct VhostUserMemory {
uint32_t nregions;
uint32_t padding;
VhostUserMemoryRegion regions[VHOST_MEMORY_BASELINE_NREGIONS];
} VhostUserMemory;
+typedef struct VhostUserMemoryXen {
+ uint32_t nregions;
+ uint32_t padding;
+ VhostUserMemoryRegionXen regions[VHOST_MEMORY_BASELINE_NREGIONS];
+} VhostUserMemoryXen;
+
typedef struct VhostUserMemRegMsg {
uint64_t padding;
VhostUserMemoryRegion region;
@@ -294,6 +320,7 @@ typedef union {
struct vhost_vring_state state;
struct vhost_vring_addr addr;
VhostUserMemory memory;
+ VhostUserMemoryXen memory_xen;
VhostUserMemRegMsg mem_reg;
VhostUserLog log;
struct vhost_iotlb_msg iotlb;
@@ -594,6 +621,8 @@ static MemoryRegion *vhost_user_get_mr_data(uint64_t addr,
ram_addr_t *offset,
static bool vhost_user_gpa_addresses(struct vhost_dev *dev)
{
return vhost_user_has_protocol_feature(
+ dev, VHOST_USER_PROTOCOL_F_XEN_MMAP) ||
+ vhost_user_has_protocol_feature(
dev, VHOST_USER_PROTOCOL_F_GPA_ADDRESSES);
}
@@ -612,6 +641,23 @@ static void vhost_user_fill_msg_region(struct vhost_dev
*dev,
dst->mmap_offset = mmap_offset;
}
+/*
+ * With VHOST_USER_PROTOCOL_F_XEN_MMAP the region fds are opened by us
+ * rather than owned by the RAMBlocks, so they must be closed once the
+ * message carrying them has been sent (or on error).
+ */
+static void vhost_user_put_region_fds(struct vhost_dev *dev, int *fds,
+ size_t fd_num)
+{
+ if (!vhost_user_has_protocol_feature(dev, VHOST_USER_PROTOCOL_F_XEN_MMAP))
{
+ return;
+ }
+ for (size_t i = 0; i < fd_num; i++) {
+ trace_vhost_user_put_region_fds(i, fds[i]);
+ close(fds[i]);
+ }
+}
+
static int vhost_user_fill_set_mem_table_msg(struct vhost_user *u,
struct vhost_dev *dev,
VhostUserMsg *msg,
@@ -623,13 +669,41 @@ static int vhost_user_fill_set_mem_table_msg(struct
vhost_user *u,
MemoryRegion *mr;
struct vhost_memory_region *reg;
VhostUserMemoryRegion region_buffer;
+ bool xen_mmap = vhost_user_has_protocol_feature(dev,
+ VHOST_USER_PROTOCOL_F_XEN_MMAP);
+
+ if (track_ramblocks && xen_mmap) {
+ error_report("vhost-user: postcopy is not supported under Xen");
+ return -ENOTSUP;
+ }
msg->hdr.request = VHOST_USER_SET_MEM_TABLE;
for (i = 0; i < dev->mem->nregions; ++i) {
reg = dev->mem->regions + i;
- mr = vhost_user_get_mr_data(reg->userspace_addr, &offset, &fd);
+ if (xen_mmap) {
+ /*
+ * Under Xen the guest RAM is not mapped into our address
+ * space; the backend maps it through the Xen foreign
+ * mapping interface using the guest physical address and
+ * domain id carried in the region descriptor. The file
+ * descriptor only satisfies the one-fd-per-region
+ * requirement of the protocol: pass /dev/xen/privcmd and
+ * close it once the message has been sent.
+ */
+ mr = NULL;
+ offset = 0;
+ fd = open("/dev/xen/privcmd", O_RDWR | O_CLOEXEC);
+ if (fd < 0) {
+ error_report("vhost-user: failed to open /dev/xen/privcmd:"
+ " %s", strerror(errno));
+ return -errno;
+ }
+ trace_vhost_user_open_region_fd(i, fd);
+ } else {
+ mr = vhost_user_get_mr_data(reg->userspace_addr, &offset, &fd);
+ }
if (fd > 0) {
if (track_ramblocks) {
assert(*fd_num < VHOST_MEMORY_BASELINE_NREGIONS);
@@ -642,10 +716,21 @@ static int vhost_user_fill_set_mem_table_msg(struct
vhost_user *u,
u->region_rb[i] = mr->ram_block;
} else if (*fd_num == VHOST_MEMORY_BASELINE_NREGIONS) {
error_report("Failed preparing vhost-user memory table msg");
+ if (xen_mmap) {
+ close(fd);
+ }
return -ENOBUFS;
}
vhost_user_fill_msg_region(dev, ®ion_buffer, reg, offset);
- msg->payload.memory.regions[*fd_num] = region_buffer;
+ if (xen_mmap) {
+ msg->payload.memory_xen.regions[*fd_num].region =
region_buffer;
+ msg->payload.memory_xen.regions[*fd_num].xen_mmap_flags =
+ VHOST_USER_XEN_MMAP_FLAG_FOREIGN;
+ msg->payload.memory_xen.regions[*fd_num].xen_mmap_data =
+ xen_domid;
+ } else {
+ msg->payload.memory.regions[*fd_num] = region_buffer;
+ }
fds[(*fd_num)++] = fd;
} else if (track_ramblocks) {
u->region_rb_offset[i] = 0;
@@ -663,7 +748,11 @@ static int vhost_user_fill_set_mem_table_msg(struct
vhost_user *u,
msg->hdr.size = sizeof(msg->payload.memory.nregions);
msg->hdr.size += sizeof(msg->payload.memory.padding);
- msg->hdr.size += *fd_num * sizeof(VhostUserMemoryRegion);
+ if (xen_mmap) {
+ msg->hdr.size += *fd_num * sizeof(VhostUserMemoryRegionXen);
+ } else {
+ msg->hdr.size += *fd_num * sizeof(VhostUserMemoryRegion);
+ }
return 0;
}
@@ -1149,10 +1238,12 @@ static int vhost_user_set_mem_table(struct vhost_dev
*dev,
ret = vhost_user_fill_set_mem_table_msg(u, dev, &msg, fds, &fd_num,
false);
if (ret < 0) {
+ vhost_user_put_region_fds(dev, fds, fd_num);
return ret;
}
ret = vhost_user_write(dev, &msg, fds, fd_num);
+ vhost_user_put_region_fds(dev, fds, fd_num);
if (ret < 0) {
return ret;
}
@@ -2551,6 +2642,29 @@ static int vhost_user_backend_init(struct vhost_dev
*dev, void *opaque,
VHOST_USER_PROTOCOL_F_GET_VRING_BASE_INFLIGHT);
}
+ if (!xen_enabled()) {
+ /*
+ * Xen memory mappings only make sense when QEMU itself runs
+ * as a Xen device model.
+ */
+ protocol_features &= ~(1ULL << VHOST_USER_PROTOCOL_F_XEN_MMAP);
+ } else {
+ if (!virtio_has_feature(protocol_features,
+ VHOST_USER_PROTOCOL_F_XEN_MMAP)) {
+ error_setg(errp, "vhost-user backend does not support "
+ "VHOST_USER_PROTOCOL_F_XEN_MMAP, which is "
+ "required when running under Xen");
+ return -EPROTO;
+ }
+ /*
+ * The ADD/REM_MEM_REG message path has not been adapted to
+ * the Xen region format. Xen guests expose a single RAM
+ * region, so fall back to SET_MEM_TABLE.
+ */
+ protocol_features &=
+ ~(1ULL << VHOST_USER_PROTOCOL_F_CONFIGURE_MEM_SLOTS);
+ }
+
/* final set of protocol features */
u->protocol_features = protocol_features;
err = vhost_user_set_protocol_features(dev, u->protocol_features);
diff --git a/include/hw/virtio/vhost-user.h b/include/hw/virtio/vhost-user.h
index 06c360af18..46be9cd57c 100644
--- a/include/hw/virtio/vhost-user.h
+++ b/include/hw/virtio/vhost-user.h
@@ -30,7 +30,7 @@ enum VhostUserProtocolFeature {
VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS = 14,
VHOST_USER_PROTOCOL_F_CONFIGURE_MEM_SLOTS = 15,
VHOST_USER_PROTOCOL_F_STATUS = 16,
- /* Feature 17 reserved for VHOST_USER_PROTOCOL_F_XEN_MMAP. */
+ VHOST_USER_PROTOCOL_F_XEN_MMAP = 17,
VHOST_USER_PROTOCOL_F_SHARED_OBJECT = 18,
VHOST_USER_PROTOCOL_F_DEVICE_STATE = 19,
VHOST_USER_PROTOCOL_F_GET_VRING_BASE_INFLIGHT = 20,
--
2.43.0
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |