[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: Possible bug? DOM-U network stopped working after fatal error reported in DOM0
On Sun, Dec 26, 2021 at 02:06:55AM +0800, G.R. wrote: > > > Thanks. I've raised this on freensd-net for advice [0]. IMO netfront > > > shouldn't receive an mbuf that crosses a page boundary, but if that's > > > indeed a legit mbuf I will figure out the best way to handle it. > > > > > > I have a clumsy patch (below) that might solve this, if you want to > > > give it a try. > > > > Applied the patch and it worked like a charm! > > Thank you so much for your quick help! > > Wish you a wonderful holiday! > > I may have said too quickly... > With the patch I can attach the iscsi disk and neither the dom0 nor > the NAS domU complains this time. > But when I attempt to mount the attached disk it reports I/O errors randomly. > By randomly I mean different disks behave differently... > I don't see any error logs from kernels this time. > (most of the iscsi disks are NTFS FS and mounted through the user mode > fuse library) > But since I have a local backup copy of the image, I can confirm that > mounting that backup image does not result in any I/O error. > Looks like something is still broken here... Indeed. That patch was likely too simple, and didn't properly handle the split of mbuf data buffers. I have another version based on using sglist, which I think it's also a worthwhile change for netfront. Can you please give it a try? I've done a very simple test and seems fine, but you certainly have more interesting cases. You will have to apply it on top of a clean tree, without any of the other patches applied. Thanks, Roger. --- diff --git a/sys/dev/xen/netfront/netfront.c b/sys/dev/xen/netfront/netfront.c index 8dba5a8dc6d5..37ea7b1fa059 100644 --- a/sys/dev/xen/netfront/netfront.c +++ b/sys/dev/xen/netfront/netfront.c @@ -33,6 +33,8 @@ __FBSDID("$FreeBSD$"); #include "opt_inet.h" #include "opt_inet6.h" +#include <sys/types.h> + #include <sys/param.h> #include <sys/sockio.h> #include <sys/limits.h> @@ -40,6 +42,7 @@ __FBSDID("$FreeBSD$"); #include <sys/malloc.h> #include <sys/module.h> #include <sys/kernel.h> +#include <sys/sglist.h> #include <sys/socket.h> #include <sys/sysctl.h> #include <sys/taskqueue.h> @@ -199,6 +202,12 @@ struct netfront_txq { struct taskqueue *tq; struct task defrtask; + struct sglist *segments; + struct mbuf_refcount { + struct m_tag tag; + u_int count; + } refcount_tag[NET_TX_RING_SIZE + 1]; + bool full; }; @@ -301,6 +310,38 @@ xn_get_rx_ref(struct netfront_rxq *rxq, RING_IDX ri) return (ref); } +#define MTAG_REFCOUNT 0 + +static void mbuf_grab(uint32_t cookie, struct mbuf *m) +{ + struct mbuf_refcount *ref; + + ref = (struct mbuf_refcount *)m_tag_locate(m, cookie, MTAG_REFCOUNT, + NULL); + KASSERT(ref != NULL, ("Cannot find refcount")); + ref->count++; +} + +static void mbuf_release(uint32_t cookie, struct mbuf *m) +{ + struct mbuf_refcount *ref; + + ref = (struct mbuf_refcount *)m_tag_locate(m, cookie, MTAG_REFCOUNT, + NULL); + KASSERT(ref != NULL, ("Cannot find refcount")); + KASSERT(ref->count > 0, ("Invalid reference count")); + + if(--ref->count == 0) + m_freem(m); +} + +static void tag_free(struct m_tag *t) +{ + struct mbuf_refcount *ref = (struct mbuf_refcount *)t; + + KASSERT(ref->count == 0, ("Free mbuf tag with pending refcnt")); +} + #define IPRINTK(fmt, args...) \ printf("[XEN] " fmt, ##args) #ifdef INVARIANTS @@ -778,7 +819,7 @@ disconnect_txq(struct netfront_txq *txq) static void destroy_txq(struct netfront_txq *txq) { - + sglist_free(txq->segments); free(txq->ring.sring, M_DEVBUF); buf_ring_free(txq->br, M_DEVBUF); taskqueue_drain_all(txq->tq); @@ -826,6 +867,11 @@ setup_txqs(device_t dev, struct netfront_info *info, for (i = 0; i <= NET_TX_RING_SIZE; i++) { txq->mbufs[i] = (void *) ((u_long) i+1); txq->grant_ref[i] = GRANT_REF_INVALID; + m_tag_setup(&txq->refcount_tag[i].tag, + (unsigned long)txq, MTAG_REFCOUNT, + sizeof(txq->refcount_tag[i]) - + sizeof(txq->refcount_tag[i].tag)); + txq->refcount_tag[i].tag.m_tag_free = &tag_free; } txq->mbufs[NET_TX_RING_SIZE] = (void *)0; @@ -874,10 +920,18 @@ setup_txqs(device_t dev, struct netfront_info *info, device_printf(dev, "xen_intr_alloc_and_bind_local_port failed\n"); goto fail_bind_port; } + + txq->segments = sglist_alloc(MAX_TX_REQ_FRAGS, M_WAITOK); + if (txq->segments == NULL) { + device_printf(dev, "failed to allocate sglist\n"); + goto fail_sglist; + } } return (0); +fail_sglist: + xen_intr_unbind(&txq->xen_intr_handle); fail_bind_port: taskqueue_drain_all(txq->tq); fail_start_thread: @@ -1041,7 +1095,7 @@ xn_release_tx_bufs(struct netfront_txq *txq) if (txq->mbufs_cnt < 0) { panic("%s: tx_chain_cnt must be >= 0", __func__); } - m_free(m); + mbuf_release((unsigned long)txq, m); } } @@ -1311,7 +1365,7 @@ xn_txeof(struct netfront_txq *txq) txq->mbufs[id] = NULL; add_id_to_freelist(txq->mbufs, id); txq->mbufs_cnt--; - m_free(m); + mbuf_release((unsigned long)txq, m); /* Only mark the txq active if we've freed up at least one slot to try */ ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; } @@ -1507,22 +1561,6 @@ xn_get_responses(struct netfront_rxq *rxq, return (err); } -/** - * \brief Count the number of fragments in an mbuf chain. - * - * Surprisingly, there isn't an M* macro for this. - */ -static inline int -xn_count_frags(struct mbuf *m) -{ - int nfrags; - - for (nfrags = 0; m != NULL; m = m->m_next) - nfrags++; - - return (nfrags); -} - /** * Given an mbuf chain, make sure we have enough room and then push * it onto the transmit ring. @@ -1530,16 +1568,22 @@ xn_count_frags(struct mbuf *m) static int xn_assemble_tx_request(struct netfront_txq *txq, struct mbuf *m_head) { - struct mbuf *m; struct netfront_info *np = txq->info; struct ifnet *ifp = np->xn_ifp; - u_int nfrags; - int otherend_id; + u_int nfrags, i; + int otherend_id, rc; + + sglist_reset(txq->segments); + rc = sglist_append_mbuf(txq->segments, m_head); + if (rc != 0) { + m_freem(m_head); + return (rc); + } /** * Defragment the mbuf if necessary. */ - nfrags = xn_count_frags(m_head); + nfrags = txq->segments->sg_nseg; /* * Check to see whether this request is longer than netback @@ -1551,6 +1595,8 @@ xn_assemble_tx_request(struct netfront_txq *txq, struct mbuf *m_head) * the Linux network stack. */ if (nfrags > np->maxfrags) { + struct mbuf *m; + m = m_defrag(m_head, M_NOWAIT); if (!m) { /* @@ -1561,11 +1607,15 @@ xn_assemble_tx_request(struct netfront_txq *txq, struct mbuf *m_head) return (EMSGSIZE); } m_head = m; + sglist_reset(txq->segments); + rc = sglist_append_mbuf(txq->segments, m_head); + if (rc != 0) { + m_freem(m_head); + return (rc); + } + nfrags = txq->segments->sg_nseg; } - /* Determine how many fragments now exist */ - nfrags = xn_count_frags(m_head); - /* * Check to see whether the defragmented packet has too many * segments for the Linux netback driver. @@ -1604,14 +1654,15 @@ xn_assemble_tx_request(struct netfront_txq *txq, struct mbuf *m_head) * the fragment pointers. Stop when we run out * of fragments or hit the end of the mbuf chain. */ - m = m_head; otherend_id = xenbus_get_otherend_id(np->xbdev); - for (m = m_head; m; m = m->m_next) { + for (i = 0; i < nfrags; i++) { netif_tx_request_t *tx; uintptr_t id; grant_ref_t ref; u_long mfn; /* XXX Wrong type? */ + struct sglist_seg *seg; + seg = &txq->segments->sg_segs[i]; tx = RING_GET_REQUEST(&txq->ring, txq->ring.req_prod_pvt); id = get_id_from_freelist(txq->mbufs); if (id == 0) @@ -1621,17 +1672,22 @@ xn_assemble_tx_request(struct netfront_txq *txq, struct mbuf *m_head) if (txq->mbufs_cnt > NET_TX_RING_SIZE) panic("%s: tx_chain_cnt must be <= NET_TX_RING_SIZE\n", __func__); - txq->mbufs[id] = m; + if (i == 0) + m_tag_prepend(m_head, &txq->refcount_tag[id].tag); + mbuf_grab((unsigned long)txq, m_head); + txq->mbufs[id] = m_head; tx->id = id; ref = gnttab_claim_grant_reference(&txq->gref_head); KASSERT((short)ref >= 0, ("Negative ref")); - mfn = virt_to_mfn(mtod(m, vm_offset_t)); + mfn = atop(seg->ss_paddr); gnttab_grant_foreign_access_ref(ref, otherend_id, mfn, GNTMAP_readonly); tx->gref = txq->grant_ref[id] = ref; - tx->offset = mtod(m, vm_offset_t) & (PAGE_SIZE - 1); + tx->offset = seg->ss_paddr & PAGE_MASK; + KASSERT(tx->offset + seg->ss_len <= PAGE_SIZE, + ("mbuf segment crosses a page boundary")); tx->flags = 0; - if (m == m_head) { + if (i == 0) { /* * The first fragment has the entire packet * size, subsequent fragments have just the @@ -1640,7 +1696,7 @@ xn_assemble_tx_request(struct netfront_txq *txq, struct mbuf *m_head) * subtracting the sizes of the other * fragments. */ - tx->size = m->m_pkthdr.len; + tx->size = m_head->m_pkthdr.len; /* * The first fragment contains the checksum flags @@ -1654,12 +1710,12 @@ xn_assemble_tx_request(struct netfront_txq *txq, struct mbuf *m_head) * so we have to test for CSUM_TSO * explicitly. */ - if (m->m_pkthdr.csum_flags + if (m_head->m_pkthdr.csum_flags & (CSUM_DELAY_DATA | CSUM_TSO)) { tx->flags |= (NETTXF_csum_blank | NETTXF_data_validated); } - if (m->m_pkthdr.csum_flags & CSUM_TSO) { + if (m_head->m_pkthdr.csum_flags & CSUM_TSO) { struct netif_extra_info *gso = (struct netif_extra_info *) RING_GET_REQUEST(&txq->ring, @@ -1667,7 +1723,7 @@ xn_assemble_tx_request(struct netfront_txq *txq, struct mbuf *m_head) tx->flags |= NETTXF_extra_info; - gso->u.gso.size = m->m_pkthdr.tso_segsz; + gso->u.gso.size = m_head->m_pkthdr.tso_segsz; gso->u.gso.type = XEN_NETIF_GSO_TYPE_TCPV4; gso->u.gso.pad = 0; @@ -1677,9 +1733,9 @@ xn_assemble_tx_request(struct netfront_txq *txq, struct mbuf *m_head) gso->flags = 0; } } else { - tx->size = m->m_len; + tx->size = seg->ss_len; } - if (m->m_next) + if (i != nfrags - 1) tx->flags |= NETTXF_more_data; txq->ring.req_prod_pvt++;
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |