[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [Minios-devel] [UNIKRAFT PATCH] plat/kvm: Introduce virtio network driver
Hello, Please find my comment inline:As a general comment, check_patch produces error with "Unrecognized email address". Please fix it. In our current design, we have the different layers involved in Virtio in a single implementation. It might be wise to split them up as it would better reflect the interactions involved. I would like to summarize the brief breakdown of the layers and the interactions involved. * The Virtio-ring is responsible for * ring meta data maintenance* API for add/getting buffer and maintaining the data structure associated with the virtio-ring. * API to enable and disable notification on the interrupt. * The Virtio-pci is responsible for: * Reading/Writing configuration * Reading Status information * Resetting the device. * Acknowledging the PCI Device and the driver if found. * Interrupt handling / Configuring the different type of interrupt. * The Virtio-net is responsible for: * Maintaining the network data buffer to be sent to the virtio-ring* Negotiate the feature of the network device eg MAC_Address, Max Virtqueue. * Allocating/Deallocating the virtqueue for the TX and RX and control queue (if necessary). * Handling event callback from the virt-ring. * API interacting with the network stack.From my perspective such a split would enable us to add other virtio device while using the rest of the infrastructure. It would also help replace the pci with mmio as necessary. On 07/24/2018 06:43 PM, Razvan Cojocaru wrote: This patch must be applied after Unikraft Network API is upstreamed.On Tue, Jul 24, 2018, 19:26 Razvan Cojocaru <razvan.cojocaru93@xxxxxxxxx <mailto:razvan.cojocaru93@xxxxxxxxx>> wrote:Initial implementation of a virtio network driver based on Unikraft Net API and virtio base driver/rings. Supports basic Net API functions such as start/stop, RX/TX packet and RX interrupt callback. Tested with lwIP, both as polling-mode driver and with interrupts. The implementation was ported from Solo5 and adapted to Unikraft APIs. Signed-off-by: Razvan Cojocaru <razvan.cojocaru93@xxxxxxxxx <mailto:razvan.cojocaru93@xxxxxxxxx>> --- plat/drivers/virtio/virtio_net.c | 546 +++++++++++++++++++++++++++++++++++++++ plat/kvm/Config.uk | 10 +- plat/kvm/Makefile.uk | 11 +- 3 files changed, 560 insertions(+), 7 deletions(-) create mode 100644 plat/drivers/virtio/virtio_net.c diff --git a/plat/drivers/virtio/virtio_net.c b/plat/drivers/virtio/virtio_net.c new file mode 100644 index 0000000..d416752 --- /dev/null +++ b/plat/drivers/virtio/virtio_net.c @@ -0,0 +1,546 @@ +/* SPDX-License-Identifier: ISC */ +/* + * Authors: Dan Williams + * Martin Lucina + * Ricardo Koller + * Razvan Cojocaru <razvan.cojocaru93@xxxxxxxxx <mailto:razvan.cojocaru93@xxxxxxxxx>> + * + * Copyright (c) 2015-2017 IBM + * Copyright (c) 2016-2017 Docker, Inc. + * Copyright (c) 2018, NEC Europe Ltd., NEC Corporation + * + * Permission to use, copy, modify, and/or distribute this software + * for any purpose with or without fee is hereby granted, provided + * that the above copyright notice and this permission notice appear + * in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL + * WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE + * AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR + * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS + * OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, + * NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN + * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +/* Taken and adapted from solo5 virtio_net.c */ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include <uk/plat/lcpu.h> +#include <pci/pci_bus.h> +#include <kvm/irq.h> +#include <cpu.h> +#include <pci/virtio/virtio_ring.h> +#include <pci/virtio/virtio_pci.h> +#include <uk/wait.h> +#include <uk/netdev.h> +#include <uk/print.h> +#include <uk/assert.h> +#include <uk/essentials.h> +#if CONFIG_HAVE_SCHED +#include <uk/thread.h> +#include <uk/wait.h> +#endif + +#define VENDOR_QUMRANET_VIRTIO 0x1af4 /** Review * We should define PCI_LEGACY_NW_DEVICE_ID as 0x1000. * * The PCI_CONF_SUBSYS_NET define id within the virtio id space which is * # Network Device id - 1 * # Block Device id - 2 and so on. * * Since in our current implementation we register with the PCI bus, * we should be using the PCI_LEGACY_DEVICE_ID (0x1000) * or PCI_MODERN_DEVICE_ID(0x1041) */ +#define PCI_CONF_SUBSYS_NET 1 + +/* The feature bitmap for virtio net */ +#define VIRTIO_NET_F_CSUM 0 /* Host handles pkts w/ partial csum */ +#define VIRTIO_NET_F_GUEST_CSUM 1 /* Guest handles pkts w/ partial csum */ /** Review: * The virtio standard mentions the VIRTIO_NET_F_MAC as 5.It is wise to * retain it as 5. It would be a separation of the standard constant * from the mechanism to store it. * * It may be better to separate macro which store the bit map. */ +#define VIRTIO_NET_F_MAC (1 << 5) /* Host has given MAC address. */ + /** Review * What do we define as PKT_BUFFER_LEN? Does this information also * include the virtio header? A comment describing why 1526 would be * helpful. */ +#define PKT_BUFFER_LEN 1526 + +static struct uk_alloc *a; + +#define VIRTQ_RECV 0 +#define VIRTQ_XMIT 1 + +/* This header comes first in the scatter-gather list. + * If VIRTIO_F_ANY_LAYOUT is not negotiated, it must + * be the first element of the scatter-gather list. If you don't + * specify GSO or CSUM features, you can simply ignore the header. + */ +struct __packed virtio_net_hdr { +#define VIRTIO_NET_HDR_F_NEEDS_CSUM 1 /* Use csum_start, csum_offset*/ +#define VIRTIO_NET_HDR_F_DATA_VALID 2 /* Csum is valid */ + uint8_t flags; +#define VIRTIO_NET_HDR_GSO_NONE 0 /* Not a GSO frame */ +#define VIRTIO_NET_HDR_GSO_TCPV4 1 /* GSO frame, IPv4 TCP (TSO) */ +#define VIRTIO_NET_HDR_GSO_UDP 3 /* GSO frame, IPv4 UDP(UFO) */ +#define VIRTIO_NET_HDR_GSO_TCPV6 4 /* GSO frame, IPv6 TCP */ +#define VIRTIO_NET_HDR_GSO_ECN 0x80 /* TCP has ECN set */ /** Review * Virtio ring define data type to be used for 16-bit integer. We should * be using this data type instead of uint16_t. We should also probably * define a datatype of uint8_t for consistency. */ + uint8_t gso_type; + uint16_t hdr_len; /* Ethernet + IP + tcp/udp hdrs */+ uint16_t gso_size; /* Bytes to append to hdr_len per frame */ + uint16_t csum_start; /* Position tostart checksumming from */ + uint16_t csum_offset; /* Offset after that to place checksum */ +}; + /** Review: * 1) It would be better to define the virtio-net-config to define the * configuration space. This structure is described in section * 5.1.4 [2] as below: * # MAC Address: Which is always present * # Status: These enabled based on the flag VIRTIO_NET_F_STATUS * # Max Virtualqueue pairs: Enable based on the flag VIRTIO_NET_F_MQ * which is not currently supported but most likely to added. * * I would suggest to include this structure as it would clearly define * the configuration space of the virtio-net device as it is part of the * Virtio-standard. We can query the read/write configuration space * using offset defined in the structure. eg * __offsetof(struct virtio_net_config, mac); */ +struct virtio_net_device { + struct pci_device *dev; + struct uk_netdev netdev; + uint16_t pci_base; /* base in PCI config space */ + struct virtq recvq; + uint8_t recv_intr_enabled; + struct virtq xmitq; /** Review: * 1) Why would we need create thread within virtio driver for the * recieve queue. It would be better manage these thread within * libuknet. The virtio-net device would be responsible for handling * callback from the virtqueue and signalling the network stack * regarding the receipt of a packet. * * 2) We also need to protect the usage of the thread and waitq with * the respective libraries. */ + struct uk_thread *thread; + struct uk_waitq wq; +}; + /** Review: * The interrupt handled in this handler is that of the virtio-pci * device. * * When we plan to restructure the virtio-layer this function * will have to shifted to the virtio-pci layer. This would create a * clear distinction between the virtio pci device and the virtio device * ie network device. * * Please refer to the top for more detailed description */ +static int virtio_net_irq_handle(void *arg) +{ + struct virtio_net_device *d = (struct virtio_net_device *) arg; + uint8_t isr_status; + + if (unlikely(d->netdev.data->state != UK_NETDEV_RUNNING)) + return 0; + + isr_status = inb(d->pci_base + VIRTIO_PCI_ISR); + if (isr_status & VIRTIO_PCI_ISR_HAS_INTR){ + uk_waitq_wake_up(&d->wq); + return 1; + } + return 0; +} + +static void recv_setup(struct virtio_net_device *d) +{ + uint16_t mask = (uint16_t)(d->recvq.num- 1); + + do { /** Review: * We are combining the network data buffer within the virtio-ring * implementation in the io_buffer. I think it is wise to decouple * information and move toward a data structure like a * scatter/gather list. * * The virtq ring structure contains internal information like the * next_avail to manage the virt-ring. It should be directly * accessed from the virtio-net device. We */ + struct io_buffer + *buf; /* header and data in a single descriptor */ + buf = &d->recvq.bufs[d->recvq.next_avail & mask]; /** Review: * Why do we need to cleanup the memory? Virtio provides * us with length field to determine the size of the received * buffer? */ + memset(buf->data, 0, PKT_BUFFER_LEN); + buf->len = PKT_BUFFER_LEN; + buf->extra_flags = VIRTQ_DESC_F_WRITE; /** Review: * Why are we asserting for error? */ + UK_ASSERT(virtq_add_descriptor_chain( + &d->recvq, d->recvq.next_avail & mask, 1) + == 0); + } while ((d->recvq.next_avail & mask) != 0); + /** Review: * This should belong to the virtio-pci device. We must also check if * device want to suppress the notification. * * Please refer to the top for more detailed description */ + outw(d->pci_base + VIRTIO_PCI_QUEUE_NOTIFY, VIRTQ_RECV); +} + +static int virtio_netdev_xmit(struct uk_netdev *n, + uint16_t queue_id __unused, struct uk_netdev_mbuf *mbuf) +{ + struct virtio_net_device *d; + struct io_buffer *head_buf, *data_buf; + uint16_t mask; + uint16_t head; + int r; + + UK_ASSERT(n != NULL); /** Review: * Prefer if we had a helper function/macro to return the * virtio_net_device instead of invoking the __containerof directly. */ + d = __containerof(n, struct virtio_net_device, netdev); + mask = (uint16_t)(d->xmitq.num - 1); + /** Review: * Is it be possible for the packet length could be > 1526 * when we set the MTU on initialization to the PKT_BUFFER_LEN. * If it is an error with the NW stack we might as well assert for it. */ + if (unlikely(mbuf->len > PKT_BUFFER_LEN)) + return -EINVAL; + /** Review: * 1) We are reclaiming the buffers freed by the device * but we must careful here as the user of the stack could possibly * enable the interrupt on the virtqueue resulting in notification * on completion of data transfer to host. The interrupt user should not * clean up the descriptor then or we will have to protect these data * structure using locks. * * It will also be better to provide a helper function which * perform the cleanup. * free_old_tx_desc(struct virtio_net_device *) * * * 2) In the below piece of code, we are manipulating the internal ring * structures which are critical. I would prefer if we * provide helper functions perform the manipulating of the * num_avail, last_used and next_avail. Something like * virtq_getbuf(struct virtq *); * * 3) In this we are using idx, which is update from the host. I * think, we require a rmb to while working with the available idx as * the host is also writing to it. */ + /* Consume used descriptors from all the previous tx'es. */ + for (; d->xmitq.last_used != d->xmitq.used->idx; d->xmitq.last_used++) + d->xmitq.num_avail += 2; /* 2 descriptors per chain */ + /** Review: * We are using the internal virtq data structure (next_avail). * I would prefer if we provide helper function in virtio_ring * to get its buf. In this case we * could have something like * virtq_fetchAvailBuf(struct virtq *, int tot_buf_cnt) */ + /* next_avail is incremented by virtq_add_descriptor_chain below. */ + head = d->xmitq.next_avail & mask; + head_buf = &d->xmitq.bufs[head]; + data_buf = &d->xmitq.bufs[(head + 1) & mask]; + + /* The header buf */ + memset(head_buf->data, 0, sizeof(struct virtio_net_hdr)); + head_buf->len = sizeof(struct virtio_net_hdr); + head_buf->extra_flags = 0; + + /* The data buf */ + memcpy(data_buf->data, mbuf->payload, mbuf->len); + data_buf->len = mbuf->len; /** Review: * 1) According to spec[2] section 5.1.6.2, if the virtio checksum * feature was not negotiated the gso_type in the virtio_nethdr should * be set to VIRTIO_NET_HDR_GSO_NONE. Since we do not negotiate for the * checksum feature it would be wise explicitly set the value to * VIRTIO_NET_HDR_GSO_NONE. * * This next comment probably affect other patches * enabling networking on Unikraft * 2) Since the netdevice does not support checksum * offload, we must enforce higher layer of the network stack to * perform checksum. Currently in our lwip implementation we provide * it as a configuration option. It might we wise to generate an * compilation error if the user turn on the checksum offload * in the network stack. * It is also possible on the receive side of lwip to ignore checksums * and on sending ot should calculate one. THe problem is * that we may receive packets from another VM that set CSO. However, * since we never left the physical machine, the checksum is * never calculated and on receive the flag is set on the virtio * buffer. This means that the packet has a wrong checksum although * everything is working correctly. Packets with wrong * checksums coming from external should anyway dropped already by the * physical NIC on Dom0/Hypervsior host, so these packets will * never arrive at the VM. * * 3)We may have to think about it in the future. * If we enable the checksum offload feature, we might require * information from the network stack regarding offset and * length within the packet for performing the checksum. We might * need a way to transfer this information to the network device. */ + data_buf->extra_flags = 0; + /** Review: * If the virtq_add_descriptor throw as error we are still notifying * the device. */ + r = virtq_add_descriptor_chain(&d->xmitq, head, 2); + /** Review: * We are notifying the device without verifying if the device has set * the flag to not notify. Move this functionality with the virtio-ring. * Since this would be common functionality across viritio-devices. * * Please refer to the top for more detailed description */ + outw(d->pci_base + VIRTIO_PCI_QUEUE_NOTIFY, VIRTQ_XMIT); + + return r; +} + +/* Get the data from the next_avail (top-most) receive buffer/descriptor in + * the available ring. + */ +static uint8_t *virtio_net_recv_ring_get(struct virtio_net_device *d, + uint16_t *size) +{ + uint16_t mask; + struct virtq_used_elem *e; + struct io_buffer *buf; + uint8_t *pkt; + + mask = (uint16_t)(d->recvq.num - 1); + /** Review: * Prefer for the virtio-ring layer to provide an API to disable/enable * virtqueue interrupt. This would * make it easier when we move from the flag to event based on the * VIRTIO_F_EVENT_IDX flag. * * Please refer to the top for more detailed description */ + d->recvq.avail->flags |= VIRTQ_AVAIL_F_NO_INTERRUPT; + /** Review: * 1) In spec[2] section 3.2.2, provides a cleaner implementation of * retrieving the buffer from the virtio-ring. * In the current implementation read barrier while reading buffer and * idx are missing. * * 2) The function remove the buffers from the ring should be moved to * the virtio-ring implementation as it is responsible for maintaining * the counter such last_used. * virtq_getbuf(struct virtq *); * could also be used to get the buffer for receiving. * * 3) In spec[2] section 5.1.6.6, it states this * * " When using legacy interfaces, transitional drivers which have not * negotiated VIRTIO_F_ANY_LAYOUT MUST use a single descriptor for the * struct virtio_net_hdr on both transmit and receive, with the network * data in the following descriptors." * * For the transmit queue we are following this but for receive queue we * are not doing this. * * 4) The used buffer is shared between the host device and the guest * driver, we might need read barrier before reading from these buffer. */ + /* The device increments used->idx whenever it uses a packet (i.e. it + * put a packet on our receive queue) andif it's ahead of last_used it + * means that we have a pending packet. + */ + if (d->recvq.last_used == d->recvq.used->idx) + return NULL; + + e = &(d->recvq.used->ring[d->recvq.last_used & mask]); + + if (e->len == 0) { + if (d->recv_intr_enabled) /** Review: * Prefer for the virtio-ring layer to provide an API to disable/enable * virtqueue interrupt. This would * make it easier when we move from the flag to event based on the * VIRTIO_F_EVENT_IDX flag. * * Please refer to the top for more detailed description */ + d->recvq.avail->flags &= ~VIRTQ_AVAIL_F_NO_INTERRUPT; + *size = 0; + return NULL; + } + + buf = (struct io_buffer *)d->recvq.desc[e->id].addr; + buf->len = e->len; + + /* Remove the virtio_net_hdr */ + *size = buf->len - sizeof(struct virtio_net_hdr); + pkt = buf->data + sizeof(struct virtio_net_hdr); + /** Review: * I prefer doing this check before doing any math - this is much * clearer to understand the error case * UK_ASSERT(e->len > sizeof(struct virtio_net_hdr) && * e->len <= MAX_BUFLEN) */ + UK_ASSERT(*size <= PKT_BUFFER_LEN); + + return pkt; +} + +/* Return the next_avail (top-most) receive buffer/descriptor to the available + * ring. + */ +static void virtio_net_recv_ring_release(struct virtio_net_device *d) +{ + uint16_t mask; + /** Review * * The function remove the buffers from the ring should be moved to * the virtio-ring implementation as it is responsible for maintaining * the counter such last_used. * * virtq_getbuf(struct virtq *); * could also be used to get the buffer for receiving. * * Please refer to the top for more detailed description */ + /* Consume the recently used descriptor. */ + d->recvq.last_used++; + d->recvq.num_avail++; + + mask = (uint16_t)(d->recvq.num - 1); + d->recvq.bufs[d->recvq.next_avail & mask].len = PKT_BUFFER_LEN; + d->recvq.bufs[d->recvq.next_avail & mask].extra_flags = + VIRTQ_DESC_F_WRITE; + /** Review: * Asserting for an error. */ + /* This sets the returned descriptor to be ready for incoming + * packets, and advances the next_avail index. + */ + UK_ASSERT( + virtq_add_descriptor_chain(&d->recvq, d->recvq.next_avail & mask, 1) + == 0); + outw(d->pci_base + VIRTIO_PCI_QUEUE_NOTIFY, VIRTQ_RECV); + + if (d->recv_intr_enabled) + d->recvq.avail->flags &= ~VIRTQ_AVAIL_F_NO_INTERRUPT; +} + +static int virtio_netdev_recv(struct uk_netdev *n, + uint16_t queue_id __unused, struct uk_netdev_mbuf *mbuf) +{ + struct virtio_net_device *d; + uint8_t *pkt; + uint16_t pktlen = 0; + + UK_ASSERT(n != NULL); /** Review: * Prefer if we had a helper function/macro to return the * virtio_net_device instead of invoking the __containerof directly. */ + d = __containerof(n, struct virtio_net_device, netdev); + + pkt = virtio_net_recv_ring_get(d, &pktlen); + + if (pkt) { /** Review: * While setting the pktlen should it not be MIN(mbuf->len, * pktlen). If buffer allocated is smaller then it we are * writting to someother location. * * An UK_ASSERT to check the length of mbuf. */ + /* also, it'sclearly not zero copy */ + memcpy(mbuf->payload, pkt, pktlen); + mbuf->len =pktlen; + virtio_net_recv_ring_release(d); + } + + return pktlen; +} + +static int virtio_netdev_rx_queue_setup(struct uk_netdev *n, + uint16_t queue_id __unused, + const struct uk_netdev_rxqueue_conf *conf __unused) +{ + struct virtio_net_device *d; + int err; + + UK_ASSERT(n != NULL); /** Review: * Prefer if we had a helper function/macro to return the * virtio_net_device instead of invoking the __containerof directly. */ + d = __containerof(n, struct virtio_net_device, netdev); + /** Review: * 1) In section 3.1.1 [2] describe the device initialization to be * performed. * We initializing the virtqueue after setting the DRIVER_OK. * Prefer if we can stick with the specification. * * 2) We could also create the separate function * virtio_netdev_queue_alloc(struct virtio_net_device *) to perform the * initalization. As we are using the same step to initialize the * virtqueue_tx and virtqueue_rx as well control queues as and when * enabled. */ + /* + * Perform device-specific setup, including discovery of virtqueues + * for the device, optional per-bus setup, reading and possibly writing + * the device's virtio configuration space, and population of + * virtqueues. + */ + err = virtq_rings_init(&d->recvq, d->pci_base, VIRTQ_RECV, a); + if (err) + goto err_out; + /** Review: * Prefer we move the io_buffer inside the virtio-net device. As it * would mean data buffer are managed by the network driver * * Please refer to the top for more detailed description */ + d->recvq.bufs = uk_calloc(a, d->recvq.num, sizeof(struct io_buffer)); + if (!d->recvq.bufs) { + err = -ENOMEM; + goto err_freeq; + } + + recv_setup(d); + + return 0; + +err_freeq: + virtq_rings_fini(&d->recvq, d->pci_base, VIRTQ_RECV, a); +err_out: + return err; +} + +static int virtio_netdev_tx_queue_setup(struct uk_netdev *n, + uint16_t queue_id __unused, + const struct uk_netdev_txqueue_conf *conf __unused) +{ + struct virtio_net_device *d; + int err; + + UK_ASSERT(n != NULL); /** Review: * Prefer if we had a helper function/macro to return the * virtio_net_device instead of invoking the __containerof directly. */ + d = __containerof(n, struct virtio_net_device, netdev); + /** Review: * 1) In section 3.1.1 [2] describe the device initialization to be * performed. * We initializing the virtqueue after setting the DRIVER_OK. * Prefer if we can stick with the specification. * * 2) We could also create the separate function * virtio_netdev_queue_alloc(struct virtio_net_device *) to perform the * initalization. As we are using the same step to initialize the * virtqueue_tx and virtqueue_rx as well control queues. */ + err = virtq_rings_init(&d->xmitq, d->pci_base, VIRTQ_XMIT, a); + if (err) + goto err_out; + + d->xmitq.bufs = uk_calloc(a, d->xmitq.num, sizeof(struct io_buffer)); + UK_ASSERT(d->recvq.bufs != NULL); + if (!d->xmitq.bufs) { + err = -ENOMEM; + goto err_freeq; + } + + return 0; + +err_freeq: + virtq_rings_fini(&d->xmitq, d->pci_base, VIRTQ_XMIT, a); +err_out: + return err; +} + +static int virtio_netdev_configure(struct uk_netdev *n, + __unused const struct uk_netdev_conf *conf) +{ + struct virtio_net_device *d; + uint32_t host_features, guest_features; + struct uk_hwaddr mac; + + UK_ASSERT(n != NULL); /** Review: * Prefer if we had a helper function/macro to return the * virtio_net_device instead of invoking the __containerof directly. */ + d = __containerof(n, struct virtio_net_device, netdev); + /** Review: * This should be assigned while adding the device. */ + d->pci_base = d->dev->base; + + /* + * Set the ACKNOWLEDGE status bit: the guest OS has notice the + * device. + * Set the DRIVER status bit: the guest OS knows how to drive the + * device. + */ /** Review: * I have a some of comments on the following piece of the code: * * 1) The acknowledgment of the device should be done while probing the * device. Why are we postponing it until we are configuring * the device? * * 2) Instead of calling the outb directly, it might be better * to have generic function, like iowrite8 to * perform the write operation. This is not specific to this patch, * as we have used the inb/outb even in virtio-ring as well as pci. * Need to look at this part when we change that. * * 3) The below code could also be moved as a part * of header file, as we would use it across virtio device drivers. * * 4) If the configuration can change should we not also reset the * device and reinitialize the device from scratch. Resetting the * may not be necessary during initialization. If we are * reconfiguring the device we will have to reset the device and * renegotiate the features. */ + outb(d->pci_base + VIRTIO_PCI_STATUS, VIRTIO_PCI_STATUS_ACK); + outb(d->pci_base + VIRTIO_PCI_STATUS, VIRTIO_PCI_STATUS_DRIVER); + + /* + * Read device feature bits, and write the subset of feature bits + * understood by the OS and driver to thedevice. During this step the + * driver MAY read (but MUST NOT write) the device-specific + * configuration fields to check that it can support the device before + * accepting it. + */ + host_features = inl(d->pci_base + VIRTIO_PCI_HOST_FEATURES); /** Review: * 1) Why are we asserting for the VIRTIO_NET_F_MAC, as it is a feature * set? I think would be better to throw an error and uk_printd() * message about the error. * * 2) According the spec [2] 5.1.5, if the MAC is not configured local * mac "would" be generated by the guest driver. Since we are generating * the a random MAC address we might add comment describing that we * report an error instead of generating a local MAC. */ + UK_ASSERT(host_features & VIRTIO_NET_F_MAC); + + /* only negotiate that the mac was set for now */ + guest_features = VIRTIO_NET_F_MAC; + outl(d->pci_base + VIRTIO_PCI_GUEST_FEATURES, guest_features); + /** Review: * According to Virtio specification[2], section 2.3.1. Config fields * greater than 32-bits cannot be atomically read. We may need to * reconsider providing generic read/write function for all these * virtio device in a separate header file which could be reused across * different virtio devices. * * Legacy Virtio PCI Device: * According to specification, we would have to the configuration * address until the same value are returned. * * 2) Why copying it two times? We could use n->data->mac_addr directly */ + for (int i = 0; i < UK_HWADDR_LEN; i++) + mac.addr_bytes[i] = + inb(d->pci_base + VIRTIO_PCI_CONFIG_OFF + i); + memcpy(&n->data->mac_addr, &mac, sizeof(struct uk_hwaddr)); + + ukplat_irq_register(d->dev->irq, virtio_net_irq_handle, d); + /** Review: * 1) Why are setting the network state within the driver? Shouldn't the * libuknet set the state? * * According to the specification[2] section 3.1, the driver of flag is * set after the virtio_ring is configured. We will have to move it as a * part of starting the net device. * * Please refer to the top for more detailed description */ + /* + * Set the DRIVER_OK status bit. At this point the device is "live". + */ + outb(d->pci_base + VIRTIO_PCI_STATUS, VIRTIO_PCI_STATUS_DRIVER_OK); + + d->netdev.data->state = UK_NETDEV_CONFIGURED; + + uk_printd(DLVL_INFO, + "PCI:%02x:%02x: Configured (features=0x%x, irq=%lu)\n", + d->dev->addr.bus, d->dev->addr.devid, host_features, d->dev->irq); + + return 0; +} + /** Review: * Why is this defined as a globally visible function when we are * passing it as a function pointer. */ +int virtio_net_enable_rx_intr(struct uk_netdev *n, + uint16_t rx_queue_id __unused) +{ + struct virtio_net_device *d; + + UK_ASSERT(n != NULL); /** Review: * Prefer if we had a helper function/macro to return the * virtio_net_device instead of invoking the __containerof directly. */ + d = __containerof(n, struct virtio_net_device, netdev); + d->recv_intr_enabled = 1; /** Review: * Prefer for the virtio-ring layer to provide an API to enable * virtqueue interrupt. This would * make it easier when we move from the flag to event based on the * VIRTIO_F_EVENT_IDX flag. * * Please refer to the top for more detailed description */ + d->recvq.avail->flags &= ~VIRTQ_AVAIL_F_NO_INTERRUPT; + return 0; +} + /** Review: * Why is this defined as a globally visible function when we are * passing it as a function pointer. */ +int virtio_net_disable_rx_intr(struct uk_netdev *n, + uint16_t rx_queue_id __unused) +{ + struct virtio_net_device *d; + + UK_ASSERT(n != NULL); /** Review: * Prefer if we had a helper function/macro to return the * virtio_net_device instead of invoking the __containerof directly. */ + d = __containerof(n, struct virtio_net_device, netdev); + d->recv_intr_enabled = 0; /** Review: * Prefer for the virtio-ring layer to provide an API to disable * virtqueue interrupt. This would * make it easier when we move from the flag to event based on the * VIRTIO_F_EVENT_IDX flag. * * Please refer to the top for more detailed description */ + d->recvq.avail->flags |= VIRTQ_AVAIL_F_NO_INTERRUPT; + return 0; +} + +static void virtio_net_thread(void *arg) +{ + struct virtio_net_device *d = arg; + struct uk_netdev *n; + + UK_ASSERT(d != NULL); + n = &d->netdev; + + while (n->data->state == UK_NETDEV_RUNNING) { /** Review: * 1) The used ring idx is a data structure between the host * device and the guest driver. I think we need mb. * * 2) This function should belong to the libuknet. As the * interrupt handler wakeup a thread in the network stack to * to perform receive operation. */ + uk_waitq_wait_event(&d->wq, + d->recvq.last_used != d->recvq.used->idx); /** Review: * if rx_cb != NULL before calling it */ + n->rx_cb(n, 0); + } +} + /** Review: * Why is this defined as a globally visible function when we are * passing it as a function pointer. */ +int virtio_net_start(struct uk_netdev *n) +{ + struct virtio_net_device *d; + char buf[UK_NETDEV_NAME_MAX_LEN]; + + UK_ASSERT(n != NULL); /** Review: * Prefer if we had a helper function/macro to return the * virtio_net_device instead of invoking the __containerof directly. */ + d = __containerof(n, struct virtio_net_device, netdev); + /** Review: * use snprintf instead of sprintf. safe coding practise */ + sprintf(buf, "virtio-net%d", n->data->id); /** Review: * The uk_netdev_name_set return error -ENOTSUP if the name config * flag are not set. Maybe, it would be wise to make the * uk_netdev_name_set as a function that return void. */ + uk_netdev_name_set(n, buf, (uint16_t)strlen(buf)); + uk_printd(DLVL_INFO, "%s started\n", buf); + /** Review: * 1) Why are setting the network state within the driver? Shouldn't the * libuknet set the state? */ + d->netdev.data->state = UK_NETDEV_RUNNING; + + /* Start the thread that handles packet RX callbacks */ + if (n->rx_cb != NULL) { /** Review: * Why are we creating a thread within the virtio driver? * libuknet is the library which perform operation on the network * packet? * Should it not be responsible for creating the receiver thread and * scheduling the network stack to receive packets? */ + uk_waitq_init(&d->wq); + d->thread =uk_thread_create(buf, virtio_net_thread, d); + if (d->thread== NULL) { + uk_printd(DLVL_ERR, "Error creating %s thread.", buf); + return -ENOMEM; + } + } + + /* + * By default, interrupts are disabled and it is up to the user or + * network stack to manually enable them with a call to + * enable_tx|rx_intr() + */ /** Review: * Prefer for the virtio-ring layer to provide an API to disable * virtqueue interrupt. This would * make it easier when we move from the flag to event based on the * VIRTIO_F_EVENT_IDX flag. * * Please refer to the top for more detailed description */ + d->recv_intr_enabled = 0; + d->recvq.avail->flags |= VIRTQ_AVAIL_F_NO_INTERRUPT; + d->xmitq.avail->flags |= VIRTQ_AVAIL_F_NO_INTERRUPT; + + return 0; +} + /** Review: * Why is this defined as a globally visible function when we are * passing it as a function pointer. */ +void virtio_net_stop(struct uk_netdev *n) +{ + struct virtio_net_device *d; + + UK_ASSERT(n != NULL); /** Review: * Prefer if we had a helper function/macro to return the * virtio_net_device instead of invoking the __containerof directly. */ + d = __containerof(n, struct virtio_net_device, netdev); + /** Review: * 1) I would prefer if we call helper function while manipulating the * internal data of uk_net_dev. * * 2) What happenes to buffers submitted to virtio? Do we wait to flush * all the buffer? */ + d->netdev.data->state = UK_NETDEV_CONFIGURED; +} + +void virtio_net_close(struct uk_netdev *n) +{ + struct virtio_net_device *d; + + UK_ASSERT(n != NULL); /** Review: * Prefer if we had a helper function/macro to return the * virtio_net_device instead of invoking the __containerof directly. */ + d = __containerof(n, struct virtio_net_device, netdev); + /** Review: * 1) I would prefer if we call helper function while manipulating the * internal data of uk_net_dev. * * 2) What happenes to buffers submitted to virtio? Do we wait to flush * all the buffer? */ + d->netdev.data->state = UK_NETDEV_UNCONFIGURED; +} + +static const struct uk_netdev_ops virtio_netdev_ops = { + .dev_configure = virtio_netdev_configure, + .rx_queue_setup = virtio_netdev_rx_queue_setup, + .tx_queue_setup = virtio_netdev_tx_queue_setup, + .dev_start = virtio_net_start, + .dev_stop = virtio_net_stop, + .dev_close = virtio_net_close, + .rx_enable_intr = virtio_net_enable_rx_intr, + .rx_disable_intr = virtio_net_disable_rx_intr, +}; + +static int virtio_net_add_dev(struct pci_device *dev) +{ + struct virtio_net_device *d; + int err; + + UK_ASSERT(dev != NULL); + + d = uk_malloc(a, sizeof(*d)); + if (!d) { + err = -ENOMEM; + goto err_out; + } + + d->dev = dev; + + /* register netdev */ + d->netdev.rx_pkt = virtio_netdev_recv; + d->netdev.tx_pkt = virtio_netdev_xmit; + d->netdev.dev_ops = &virtio_netdev_ops; + /** Review: * Do we want to initialize an internal data structure here? Should the * libuknet provide a initializer function. * * Error Check missing for uk_malloc. * * This function should do the least minimum to let libuknetdev know * that there is a network device. */ + d->netdev.data = uk_malloc(a, sizeof(struct uk_netdev_data)); + d->netdev.data->state = UK_NETDEV_UNCONFIGURED; /** Review: * Why are we setting the Network MTU to 1526? If we using the Ethernet * should we not mention it as 1500. As the Ethernet packet would be * 1500 + 18(Ethernet header). */ + d->netdev.data->mtu = PKT_BUFFER_LEN; + + uk_netdev_register(&d->netdev); + + return 0; + +err_out: + return err; +} + +static int virtio_net_drv_init(struct uk_alloc *drv_allocator) +{ + /* driver initialization */ + if (!drv_allocator) + return -EINVAL; + + a = drv_allocator; + return 0; +} + +static const struct pci_device_id pci_id_map[] = { /** Review: * 1) Prefer to use the macro provided in pci * PCI_DEVICE_ID(VENDOR_QUMRANET_VIRTIO, PCI_ANY_ID) * * 2) If we are specializing this for Network device, should we not use * device id to be Legacy network device("0x1000")? * */ + {PCI_CLASS_ANY_ID, VENDOR_QUMRANET_VIRTIO, PCI_ANY_ID, PCI_ANY_ID, + PCI_CONF_SUBSYS_NET}, + {PCI_ANY_DEVICE_ID}, +}; + +static struct pci_driver virtio_net_drv = { + .device_ids = pci_id_map, + .init = virtio_net_drv_init, + .add_dev = virtio_net_add_dev +}; + +PCI_REGISTER_DRIVER(&virtio_net_drv); diff --git a/plat/kvm/Config.uk b/plat/kvm/Config.uk index 118954d..4526925 100644 --- a/plat/kvm/Config.uk +++ b/plat/kvm/Config.uk @@ -19,10 +19,14 @@ config KVM_PCI PCI bus driver for probing and operating PCI devices if (KVM_PCI) /** Review: * I would not bundle virtio_net and virtio_ring as they have separate * reason to exists. Figure 3, in [1] provides a high level overview * on the different layers within the virtio. I believe * it would be better if we stick with it unless we have very good * reason to avoid it. */ -config KVM_PCI_VIRTIO - bool "Virtio Ring" +menu "Virtio" +config KVM_PCI_VIRTIONET + bool "Virtio Networking" default n + select LIBUKNETDEV help - Virtual queues to traverse host and guest transition + Paravirtualized networking driver + +endmenu endif endif diff --git a/plat/kvm/Makefile.uk b/plat/kvm/Makefile.uk index e379c83..d761588 100644 --- a/plat/kvm/Makefile.uk +++ b/plat/kvm/Makefile.uk @@ -8,7 +8,7 @@ $(eval $(call addplat_s,kvm,$(CONFIG_PLAT_KVM))) ## $(eval $(call addplatlib,kvm,libkvmplat)) $(eval $(call addplatlib_s,kvm,libkvmpci,$(CONFIG_KVM_PCI))) -$(eval $(call addplatlib_s,kvm,libkvmpcivirtio,$(CONFIG_KVM_PCI_VIRTIO))) +$(eval $(call addplatlib_s,kvm,libkvmpcivirtionet,$(CONFIG_KVM_PCI_VIRTIONET))) ## ## Platform library definitions @@ -52,6 +52,9 @@ LIBKVMPCI_SRCS-y += $(UK_PLAT_COMMON_BASE)/pci_bus.c|common ## ## Virtio library definitions ## -LIBKVMPCIVIRTIO_ASINCLUDES-y += -I$(UK_PLAT_COMMON_BASE)/include -LIBKVMPCIVIRTIO_CINCLUDES-y += -I$(UK_PLAT_COMMON_BASE)/include -LIBKVMPCIVIRTIO_SRCS-y += $(UK_PLAT_DRIVERS_BASE)/virtio/virtio_ring.c +LIBKVMPCIVIRTIONET_ASINCLUDES-y += -I$(LIBKVMPLAT_BASE)/include +LIBKVMPCIVIRTIONET_CINCLUDES-y += -I$(LIBKVMPLAT_BASE)/include +LIBKVMPCIVIRTIONET_ASINCLUDES-y += -I$(UK_PLAT_COMMON_BASE)/include +LIBKVMPCIVIRTIONET_CINCLUDES-y += -I$(UK_PLAT_COMMON_BASE)/include +LIBKVMPCIVIRTIONET_SRCS-y += $(UK_PLAT_DRIVERS_BASE)/virtio/virtio_ring.c +LIBKVMPCIVIRTIONET_SRCS-y += $(UK_PLAT_DRIVERS_BASE)/virtio/virtio_net.c-- 2.7.4_______________________________________________ Minios-devel mailing list Minios-devel@xxxxxxxxxxxxxxxxxxxx https://lists.xenproject.org/mailman/listinfo/minios-devel [1] https://www.ibm.com/developerworks/library/l-virtio/ [2] http://docs.oasis-open.org/virtio/virtio/v1.0/virtio-v1.0.html Thanks & Regards Sharan _______________________________________________ Minios-devel mailing list Minios-devel@xxxxxxxxxxxxxxxxxxxx https://lists.xenproject.org/mailman/listinfo/minios-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |