[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Minios-devel] [UNIKRAFT PATCH v2 7/7] plat/drivers: Receive and Transmit operations

To: Sharan Santhanam <sharan.santhanam@xxxxxxxxx>, <minios-devel@xxxxxxxxxxxxx>
From: Simon Kuenzer <simon.kuenzer@xxxxxxxxx>
Date: Wed, 24 Oct 2018 00:05:28 +0200
Cc: Razvan Cojocaru <razvan.cojocaru93@xxxxxxxxx>
Delivery-date: Tue, 23 Oct 2018 22:05:41 +0000
List-id: Mini-os development list <minios-devel.lists.xenproject.org>

On 19.10.18 15:40, Sharan Santhanam wrote:

This patch add the transmission and receive operation for the
virtio-net device. We extend the scatter gather list to operate
on the netbuf.

Signed-off-by: Sharan Santhanam <sharan.santhanam@xxxxxxxxx>
Signed-off-by: Razvan Cojocaru <razvan.cojocaru93@xxxxxxxxx>
---
  lib/uksglist/exportsyms.uk       |   1 +
  lib/uksglist/include/uk/sglist.h |  18 +++
  lib/uksglist/sglist.c            |  25 ++++
  plat/drivers/virtio/virtio_net.c | 292 ++++++++++++++++++++++++++++++++++++++-
  4 files changed, 333 insertions(+), 3 deletions(-)

diff --git a/lib/uksglist/exportsyms.uk b/lib/uksglist/exportsyms.uk
index a1e9b37..f8c60f7 100644
--- a/lib/uksglist/exportsyms.uk
+++ b/lib/uksglist/exportsyms.uk
@@ -9,3 +9,4 @@ uk_sglist_length
  uk_sglist_split
  uk_sglist_join
  uk_sglist_slice
+uk_sglist_append_netbuf
diff --git a/lib/uksglist/include/uk/sglist.h b/lib/uksglist/include/uk/sglist.h
index a89b10d..db000c0 100644
--- a/lib/uksglist/include/uk/sglist.h
+++ b/lib/uksglist/include/uk/sglist.h
@@ -44,12 +44,16 @@
  extern "C" {
  #endif /* __cplusplus */

+#include <uk/config.h>

  #include <stdint.h>
  #include <uk/arch/types.h>
  #include <uk/refcount.h>
  #ifdef CONFIG_LIBUKALLOC
  #include <uk/alloc.h>
  #endif /* CONFIG_LIBUKALLOC */
+#ifdef CONFIG_LIBUKNETDEV
+#include <uk/netbuf.h>
+#endif /* CONFIG_LIBUKNETDEV */

struct uk_sglist_seg {

        __phys_addr  ss_paddr; /* Physical address */
@@ -265,6 +269,20 @@ int uk_sglist_slice(struct uk_sglist *original, struct 
uk_sglist **slice,
                        struct uk_alloc *a, size_t offset, size_t length);
  #endif /* CONFIG_LIBUKALLOC */

+#ifdef CONFIG_LIBUKNETDEV

+/**
+ * The function create a scatter gather list from the netbuf
+ * @param sg
+ *     A reference to the scatter gather list.
+ * @param m0
+ *     A reference to the netbuf
+ * @return
+ *     0, on successful creation of the scatter gather list
+ *     -EINVAL, Invalid sg list.
+ */
+int uk_sglist_append_netbuf(struct uk_sglist *sg, struct uk_netbuf *netbuf);
+#endif /* CONFIG_LIBUKNET */
+
  #ifdef __cplusplus
  }
  #endif /* __cplusplus */
diff --git a/lib/uksglist/sglist.c b/lib/uksglist/sglist.c
index 0ec2105..50787b3 100644
--- a/lib/uksglist/sglist.c
+++ b/lib/uksglist/sglist.c
@@ -531,3 +531,28 @@ int uk_sglist_slice(struct uk_sglist *original, struct 
uk_sglist **slice,
        return 0;
  }
  #endif /* CONFIG_UKALLOC */
+
+#ifdef CONFIG_LIBUKNETDEV
+int uk_sglist_append_netbuf(struct uk_sglist *sg, struct uk_netbuf *netbuf)
+{
+       struct sgsave save;
+       struct uk_netbuf *nb;
+       int error;
+
+       if (sg->sg_maxseg == 0)
+               return -EINVAL;
+
+       error = 0;
+       SGLIST_SAVE(sg, save);
+       for (nb = netbuf; nb != NULL; nb = nb->next) {


You could use the UK_NETBUF_CHAIN_FOREACH macro instead (see netbuf.h).

It would make your implementation independent of the way netbufs arechained.

+               if (nb->len > 0) {


Probably you could add a 'likely' to this condition check as optimization.

+                       error = uk_sglist_append(sg, nb->data, nb->len);
+                       if (error) {


Probably you could add an 'unlikely' to the error case as optimization.

+                               SGLIST_RESTORE(sg, save);
+                               return error;
+                       }
+               }
+       }
+       return 0;
+}
+#endif /* CONFIG_LIBUKNET */
diff --git a/plat/drivers/virtio/virtio_net.c b/plat/drivers/virtio/virtio_net.c
index e7581f1..e873eb9 100644
--- a/plat/drivers/virtio/virtio_net.c
+++ b/plat/drivers/virtio/virtio_net.c
@@ -79,6 +79,17 @@ typedef enum {
  } virtq_type_t;

/**

+ * When mergeable buffers are not negotiated, the vtnet_rx_header structure
+ * below is placed at the beginning of the netbuf data. Use 4 bytes of pad to
+ * both keep the VirtIO header and the data non-contiguous and to keep the
+ * frame's payload 4 byte aligned.
+ */
+struct virtio_net_rx_hdr {
+       struct virtio_net_hdr vhdr;
+       char            vrh_pad[VTNET_RX_HEADER_PAD];
+};
+
+/**
   * @internal structure to represent the transmit queue.
   */
  struct uk_netdev_tx_queue {
@@ -180,6 +191,7 @@ static int virtio_net_rx_intr_disable(struct uk_netdev *n,
                                      struct uk_netdev_rx_queue *queue);
  static int virtio_net_rx_intr_enable(struct uk_netdev *n,
                                     struct uk_netdev_rx_queue *queue);
+static void virtio_netdev_xmit_free(struct uk_netdev_tx_queue *txq);
  static int virtio_netdev_xmit(struct uk_netdev *dev,
                              struct uk_netdev_tx_queue *queue,
                              struct uk_netbuf *pkt);
@@ -195,7 +207,13 @@ static int virtio_netdev_rxq_info_get(struct uk_netdev 
*dev, __u16 queue_id,
                                      struct uk_netdev_queue_info *qinfo);
  static int virtio_netdev_txq_info_get(struct uk_netdev *dev, __u16 queue_id,
                                      struct uk_netdev_queue_info *qinfo);
+static int virtio_netdev_rxq_dequeue(struct uk_netdev_rx_queue *rxq,
+                                    struct uk_netbuf **netbuf);
+static int virtio_netdev_rxq_enqueue(struct uk_netdev_rx_queue *rxq,
+                                    struct uk_netbuf *netbuf);
  static int virtio_netdev_recv_done(struct virtqueue *vq, void *priv);
+static int virtio_netdev_rx_fillup(struct uk_netdev_rx_queue *rxq,
+               struct uk_netbuf **netbuf, __u16 *count);

/**

   * Static global constants
@@ -223,30 +241,298 @@ static int virtio_netdev_recv_done(struct virtqueue *vq, 
void *priv)
        return 1;
  }

+static void virtio_netdev_xmit_free(struct uk_netdev_tx_queue *txq)

+{
+       struct uk_netbuf *pkt = NULL;
+       int cnt = 0;
+
+       pkt = (struct uk_netbuf *) virtqueue_buffer_dequeue(txq->vq, NULL);
+       while (pkt) {
+               /**
+                * Releasing the free buffer back to netbuf. The netbuf could
+                * use the destructor to inform the stack regarding the free up
+                * of memory.
+                */
+               uk_netbuf_free(pkt);
+               cnt++;
+               pkt = (struct uk_netbuf *)
+                       virtqueue_buffer_dequeue(txq->vq, NULL);
+       }

This type of loop would avoid the duplicated virtqueue_buffer_dequeue()call. What do you think?


for (;;) {
  pkt = (struct uk_netbuf *) virtqueue_buffer_dequeue(txq->vq, NULL);
  if (!pkt)
    break;
  cnt++;
  uk_netbuf_free(pkt);
}

+       uk_pr_debug("Free %"__PRIu16" descriptors\n", cnt);


I would say "%"__PRIu16" descriptors free'd\n"

+}
+
+static int virtio_netdev_rx_fillup(struct uk_netdev_rx_queue *rxq,
+               struct uk_netbuf **netbuf, __u16 *count)
+{
+       int rc = 0;
+       __u16 i = 0;
+       __u16 cnt = 0;
+
+       /**
+        * Fixed amount of memory is allocated to each received buffer. In
+        * our case since we don't support jumbo frame or LRO yet we require
+        * that the buffer feed to the ring descriptor is atleast
+        * ethernet MTU + virtio net header.
+        */
+       for (i = 0; i < *count; i++) {
+               rc = virtio_netdev_rxq_enqueue(rxq, netbuf[i]);
+               if (rc == -ENOSPC) {
+                       uk_pr_debug(
+                               "No more place available to add descriptors\n");
+                       rc = 0;
+                       break;
+               } else if (unlikely(rc < 0)) {
+                       uk_pr_err("Failed to add a buffer to the virtqueue: 
%d\n",
+                                 rc);
+                       break;
+               }
+               cnt++;
+       }
+       *count = *count - cnt; > +
+       /**
+        * Notify the host, when we submit new descriptor(s).
+        */
+       if (cnt)
+               virtqueue_host_notify(rxq->vq);
+
+       return rc;
+}
+
  static int virtio_netdev_xmit(struct uk_netdev *dev,
                              struct uk_netdev_tx_queue *queue,
                              struct uk_netbuf *pkt)
  {
+       struct virtio_net_device *vndev __unused;
+       struct virtio_net_hdr *vhdr;
+       int16_t header_sz = sizeof(*vhdr);
        int rc = 0;
+       size_t total_len = 0;

UK_ASSERT(dev);

        UK_ASSERT(pkt && queue);

+ vndev = to_virtionetdev(dev);

+       /**
+        * We are reclaiming the free descriptors from buffers. The function is
+        * not protected by means of locks. We need to be careful if there are
+        * multiple context through which we free the tx descriptors.
+        */
+       virtio_netdev_xmit_free(queue);
+
+       /**
+        * Fetch the virtio-net-header from a pre-allocated list.
+        */


This comment might be confusing. How about
"Claiming head space to place virtio-net-header later"?

+       rc = uk_netbuf_header(pkt, header_sz);
+       if (rc != 1) {
+               uk_pr_err("Failed to allocated header memory\n");
+               rc = -ENOSPC;
+               goto exit;
+       }
+       vhdr = pkt->data;
+
+       /**
+        * Fill the virtio-net-header with the necessary information.
+        * Zero explicitly set.
+        */
+       vhdr->flags = 0;
+       vhdr->gso_type = VIRTIO_NET_HDR_GSO_NONE;
+
+       /**
+        * Prepare the sglist and enqueue the buffer to the virtio-ring.
+        */
+       uk_sglist_reset(&queue->sg);
+
+       /**
+        * According the specification 5.1.6.6, we need to explicitly use
+        * 2 descriptor for each transmit and receive network packet since we
+        * do not negotiate for the VIRTIO_F_ANY_LAYOUT.
+        *
+        * 1 for the virtio header and the other for the actual network packet.
+        */
+       /* Appending the data to the list. */
+       rc = uk_sglist_append(&queue->sg, vhdr, header_sz);
+       if (unlikely(rc != 0)) {
+               uk_pr_err("Failed to append to the sg list\n");
+               goto exit;
+       }
+       rc = uk_sglist_append(&queue->sg, pkt->data + header_sz,
+                       (pkt->len - header_sz));
+       if (unlikely(rc != 0)) {
+               uk_pr_err("Failed to append to the sg list\n");
+               goto exit;
+       }
+       if (pkt->next) {
+               rc = uk_sglist_append_netbuf(&queue->sg, pkt->next);
+               if (unlikely(rc != 0)) {
+                       uk_pr_err("Failed to append to the sg list\n");
+                       goto exit;
+               }
+       }
+
+       total_len = uk_sglist_length(&queue->sg);
+       if (unlikely(total_len > VIRTIO_PKT_BUFFER_LEN)) {
+               uk_pr_err("Packet size: %lu, Max Allowed size:%u\n",
+                         total_len, VIRTIO_PKT_BUFFER_LEN);


I would correct the formatting of the error message slightly:

uk_pr_err("Packet size too big: %lu, max: %u\n",
          total_len, VIRTIO_PKT_BUFFER_LEN);

+               return -ENOTSUP;
+       }
+
+       /**
+        * Adding the descriptors to the virtqueue.
+        */
+       rc = virtqueue_buffer_enqueue(queue->vq, pkt, &queue->sg,
+                                     queue->sg.sg_nseg, 0);
+       if (likely(rc >= 0)) {
+               /**
+                * Notify the host the new buffer.
+                */
+               virtqueue_host_notify(queue->vq);
+               /**
+                * When there is further space available in the ring
+                * return 2 else 1.
+                */
+               rc = likely(rc > 0) ? 2 : 1;
+       } else if (rc == -ENOSPC) {
+               uk_pr_debug("No more descriptor available\n");
+               rc = 0;
+       } else {
+               uk_pr_err("Failed to enqueue descriptors into the ring: %d\n",
+                         rc);
+       }
+
+exit:
        return rc;
  }

-static int virtio_netdev_recv(struct uk_netdev *dev __unused,

+static int virtio_netdev_rxq_enqueue(struct uk_netdev_rx_queue *rxq,
+                                    struct uk_netbuf *netbuf)
+{
+       int rc = 0;
+       struct virtio_net_rx_hdr *rxhdr;
+       __u8 *buf_start;
+       struct uk_sglist *sg;
+
+       if (virtqueue_is_full(rxq->vq)) {
+               uk_pr_debug("The virtqueue is full\n");
+               return -ENOSPC;
+       }
+       rxhdr = netbuf->data;
+       buf_start = netbuf->data + sizeof(*rxhdr);

Hum... maybe we should use the packet buffer headroom function here. Itmight make the things inline with the tx code. For this, libuknetdev APIwould need to get a comment to document this. The user would need toreserve nb_encap of struct uk_netdev_info as headroom bytes onallocation. Or would we need another encap field for rx_fillup packets?What do you think?

You do not check that your header operation is in range(netbuf->buflen), right?

+       sg = &rxq->sg;
+       uk_sglist_reset(sg);
+
+       /* Appending the header buffer to the sglist */
+       uk_sglist_append(sg, rxhdr, sizeof(struct virtio_net_hdr));
+
+       /* Appending the data buffer to the sglist */
+       uk_sglist_append(sg, buf_start,
+                        netbuf->len - sizeof(*rxhdr));
+
+       rc = virtqueue_buffer_enqueue(rxq->vq, netbuf, sg, 0, sg->sg_nseg);
+       return rc;
+}
+
+static int virtio_netdev_rxq_dequeue(struct uk_netdev_rx_queue *rxq,
+                                    struct uk_netbuf **netbuf)
+{
+       int rc = 0;
+       struct uk_netbuf *buf = NULL;
+       __u32 len;
+       __u32 offset = sizeof(struct virtio_net_rx_hdr);
+
+       UK_ASSERT(netbuf);
+
+       buf = (struct uk_netbuf *)virtqueue_buffer_dequeue(rxq->vq, &len);
+       if (!buf) {
+               uk_pr_debug("No data available in the queue\n");
+               *netbuf = NULL;
+               return 0;
+       }
+       if (unlikely((len < VIRTIO_HDR_LEN + ETH_HDR_LEN)
+                    || (len > VIRTIO_PKT_BUFFER_LEN))) {
+               uk_pr_err("Received invalid packet size:%u\n",


Add a space after the colon.

+                         len);
+               return -EINVAL;
+       }
+
+       /**
+        * Removing the virtio header from the buffer and adjusting length.
+        * We pad "VTNET_RX_HEADER_PAD" to the rx buffer while enqueuing for
+        * alignment of the packet data. We compensate for this, by adding the
+        *  padding to the length on dequeue.
+        */
+       buf->len = len + VTNET_RX_HEADER_PAD;
+       rc = uk_netbuf_header(buf, -offset);
+       UK_ASSERT(rc == 1);
+       *netbuf = buf;
+
+       return 1;
+}
+
+static int virtio_netdev_recv(struct uk_netdev *dev,
                              struct uk_netdev_rx_queue *queue,
-                             struct uk_netbuf **pkt __unused,
+                             struct uk_netbuf **pkt,
                              struct uk_netbuf *fillup[],
                              uint16_t *fillup_count)
  {
        int rc = 0;
+       int cnt = 0;

UK_ASSERT(dev && queue);

        UK_ASSERT(!fillup || (fillup && *fillup_count > 0));

- return rc;

+       if (pkt && (queue->intr_enabled & VTNET_INTR_USR_EN_MASK)) {
+               virtqueue_intr_disable(queue->vq);
+               queue->intr_enabled &= ~(VTNET_INTR_EN);
+       }
+
+       if (pkt) {
+               rc = virtio_netdev_rxq_dequeue(queue, pkt);
+               if (unlikely(rc < 0)) {
+                       uk_pr_err("Failed to dequeue the packet: %d\n", rc);
+                       goto err_exit;
+               }
+               cnt = rc;
+       }
+       if (fillup)
+               virtio_netdev_rx_fillup(queue, fillup, fillup_count);
+
+       /* Enable interrupt only when user had previously enabled it */
+       if (pkt && (queue->intr_enabled & VTNET_INTR_USR_EN_MASK)) {
+               /* Need to enable the interrupt on the last packet */
+               rc = virtqueue_intr_enable(queue->vq);
+               if (rc == 1 && cnt == 0) {
+                       /**
+                        * Packet arrive after reading the queue and before
+                        * enabling the interrupt
+                        */
+                       rc = virtio_netdev_rxq_dequeue(queue, pkt);
+                       if (unlikely(rc < 0)) {
+                               uk_pr_err("Failed to dequeue the packet: %d\n",
+                                         rc);
+                               goto err_exit;
+                       }
+                       /* Need to enable the interrupt on the last packet */
+                       rc = virtqueue_intr_enable(queue->vq);
+                       cnt = (rc == 1) ? 2 : 1;
+               } else if (cnt > 0) {
+                       /* When there is packet in the buffer */
+                       cnt = (rc == 1) ? 2 : 1;
+               }
+       } else if (pkt && cnt > 0) {
+               /**
+                * For polling case, we report always there are further
+                * packets unless the queue is empty.
+                */
+               cnt = 2;
+       }
+
+exit:
+       return cnt;
+
+err_exit:
+       cnt = rc;
+       goto exit;
  }

static struct uk_netdev_rx_queue *virtio_netdev_rx_queue_setup(


_______________________________________________
Minios-devel mailing list
Minios-devel@xxxxxxxxxxxxxxxxxxxx
https://lists.xenproject.org/mailman/listinfo/minios-devel

References:
- [Minios-devel] [UNIKRAFT PATCH v2 0/7] plat/drivers: Introduce virtio network driver
  - From: Sharan Santhanam
- [Minios-devel] [UNIKRAFT PATCH v2 7/7] plat/drivers: Receive and Transmit operations
  - From: Sharan Santhanam

Prev by Date: Re: [Minios-devel] [UNIKRAFT PATCH v2 6/7] plat/drivers: Starting the virtio device
Next by Date: Re: [Minios-devel] [UNIKRAFT PATCH 3/7] plat/drivers: Introduce the virtio bus
Previous by thread: [Minios-devel] [UNIKRAFT PATCH v2 7/7] plat/drivers: Receive and Transmit operations
Next by thread: [Minios-devel] [UNIKRAFT PATCH v2] build: assign CONFIG_CROSS_COMPILE according to priority
Index(es):
- Date
- Thread

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.