Xen project Mailing List

Re: [Xen-devel] [PATCH 2/3] xen-netback: switch to per-cpu scratch space

From: Konrad Rzeszutek Wilk <konrad.wilk@xxxxxxxxxx>

Date: Tue, 28 May 2013 09:18:41 -0400

Cc: annie.li@xxxxxxxxxx, david.vrabel@xxxxxxxxxx, ian.campbell@xxxxxxxxxx, xen-devel@xxxxxxxxxxxxx

Delivery-date: Tue, 28 May 2013 13:19:05 +0000

List-id: Xen developer discussion <xen-devel.lists.xen.org>

On Mon, May 27, 2013 at 12:29:42PM +0100, Wei Liu wrote: > There are maximum nr_onlie_cpus netback threads running. We can make use > of per-cpu scratch space to reduce the size of buffer space when we move > to 1:1 model. > > In the unlikely event when per-cpu scratch space is not available, > processing routines will refuse to run on that CPU. > > Signed-off-by: Wei Liu <wei.liu2@xxxxxxxxxx> > --- > drivers/net/xen-netback/netback.c | 247 > ++++++++++++++++++++++++++++++------- > 1 file changed, 204 insertions(+), 43 deletions(-) > > diff --git a/drivers/net/xen-netback/netback.c > b/drivers/net/xen-netback/netback.c > index 54853be..0f69eda 100644 > --- a/drivers/net/xen-netback/netback.c > +++ b/drivers/net/xen-netback/netback.c > @@ -37,6 +37,7 @@ > #include <linux/kthread.h> > #include <linux/if_vlan.h> > #include <linux/udp.h> > +#include <linux/cpu.h> > > #include <net/tcp.h> > > @@ -95,6 +96,24 @@ struct netbk_rx_meta { > > #define MAX_BUFFER_OFFSET PAGE_SIZE > > +/* Coalescing tx requests before copying makes number of grant > + * copy ops greater or equal to number of slots required. In > + * worst case a tx request consumes 2 gnttab_copy. So the size > + * of tx_copy_ops array should be 2*MAX_PENDING_REQS. > + */ > +#define TX_COPY_OPS_SIZE (2*MAX_PENDING_REQS) > +DEFINE_PER_CPU(struct gnttab_copy *, tx_copy_ops); static > + > +/* Given MAX_BUFFER_OFFSET of 4096 the worst case is that each > + * head/fragment page uses 2 copy operations because it > + * straddles two buffers in the frontend. So the size of following > + * arrays should be 2*XEN_NETIF_RX_RING_SIZE. > + */ > +#define GRANT_COPY_OP_SIZE (2*XEN_NETIF_RX_RING_SIZE) > +#define META_SIZE (2*XEN_NETIF_RX_RING_SIZE) > +DEFINE_PER_CPU(struct gnttab_copy *, grant_copy_op); > +DEFINE_PER_CPU(struct netbk_rx_meta *, meta); static for both of them. > + > struct xen_netbk { > wait_queue_head_t wq; > struct task_struct *task; > @@ -116,21 +135,7 @@ struct xen_netbk { > atomic_t netfront_count; > > struct pending_tx_info pending_tx_info[MAX_PENDING_REQS]; > - /* Coalescing tx requests before copying makes number of grant > - * copy ops greater or equal to number of slots required. In > - * worst case a tx request consumes 2 gnttab_copy. > - */ > - struct gnttab_copy tx_copy_ops[2*MAX_PENDING_REQS]; > - > u16 pending_ring[MAX_PENDING_REQS]; > - > - /* > - * Given MAX_BUFFER_OFFSET of 4096 the worst case is that each > - * head/fragment page uses 2 copy operations because it > - * straddles two buffers in the frontend. > - */ > - struct gnttab_copy grant_copy_op[2*XEN_NETIF_RX_RING_SIZE]; > - struct netbk_rx_meta meta[2*XEN_NETIF_RX_RING_SIZE]; > }; > > static struct xen_netbk *xen_netbk; > @@ -608,12 +613,31 @@ static void xen_netbk_rx_action(struct xen_netbk *netbk) > int count; > unsigned long offset; > struct skb_cb_overlay *sco; > + struct gnttab_copy *gco = get_cpu_var(grant_copy_op); > + struct netbk_rx_meta *m = get_cpu_var(meta); > + static int unusable_count; > > struct netrx_pending_operations npo = { > - .copy = netbk->grant_copy_op, > - .meta = netbk->meta, > + .copy = gco, > + .meta = m, > }; > > + if (gco == NULL || m == NULL) { > + put_cpu_var(grant_copy_op); > + put_cpu_var(meta); > + if (unusable_count == 1000) { printk_ratelimited ? > + printk(KERN_ALERT > + "xen-netback: " > + "CPU %d scratch space is not available," > + " not doing any TX work for netback/%d\n", > + smp_processor_id(), > + (int)(netbk - xen_netbk)); So ... are you going to retry it? Drop it? Can you include in the message the the mechanism by which you are going to recover? > + unusable_count = 0; > + } else > + unusable_count++; > + return; > + } > + > skb_queue_head_init(&rxq); > > count = 0; > @@ -635,27 +659,30 @@ static void xen_netbk_rx_action(struct xen_netbk *netbk) > break; > } > > - BUG_ON(npo.meta_prod > ARRAY_SIZE(netbk->meta)); > + BUG_ON(npo.meta_prod > META_SIZE); > > - if (!npo.copy_prod) > + if (!npo.copy_prod) { > + put_cpu_var(grant_copy_op); > + put_cpu_var(meta); > return; > + } > > - BUG_ON(npo.copy_prod > ARRAY_SIZE(netbk->grant_copy_op)); > - gnttab_batch_copy(netbk->grant_copy_op, npo.copy_prod); > + BUG_ON(npo.copy_prod > GRANT_COPY_OP_SIZE); > + gnttab_batch_copy(gco, npo.copy_prod); > > while ((skb = __skb_dequeue(&rxq)) != NULL) { > sco = (struct skb_cb_overlay *)skb->cb; > > vif = netdev_priv(skb->dev); > > - if (netbk->meta[npo.meta_cons].gso_size && vif->gso_prefix) { > + if (m[npo.meta_cons].gso_size && vif->gso_prefix) { > resp = RING_GET_RESPONSE(&vif->rx, > vif->rx.rsp_prod_pvt++); > > resp->flags = XEN_NETRXF_gso_prefix | > XEN_NETRXF_more_data; > > - resp->offset = netbk->meta[npo.meta_cons].gso_size; > - resp->id = netbk->meta[npo.meta_cons].id; > + resp->offset = m[npo.meta_cons].gso_size; > + resp->id = m[npo.meta_cons].id; > resp->status = sco->meta_slots_used; > > npo.meta_cons++; > @@ -680,12 +707,12 @@ static void xen_netbk_rx_action(struct xen_netbk *netbk) > flags |= XEN_NETRXF_data_validated; > > offset = 0; > - resp = make_rx_response(vif, netbk->meta[npo.meta_cons].id, > + resp = make_rx_response(vif, m[npo.meta_cons].id, > status, offset, > - netbk->meta[npo.meta_cons].size, > + m[npo.meta_cons].size, > flags); > > - if (netbk->meta[npo.meta_cons].gso_size && !vif->gso_prefix) { > + if (m[npo.meta_cons].gso_size && !vif->gso_prefix) { > struct xen_netif_extra_info *gso = > (struct xen_netif_extra_info *) > RING_GET_RESPONSE(&vif->rx, > @@ -693,7 +720,7 @@ static void xen_netbk_rx_action(struct xen_netbk *netbk) > > resp->flags |= XEN_NETRXF_extra_info; > > - gso->u.gso.size = netbk->meta[npo.meta_cons].gso_size; > + gso->u.gso.size = m[npo.meta_cons].gso_size; > gso->u.gso.type = XEN_NETIF_GSO_TYPE_TCPV4; > gso->u.gso.pad = 0; > gso->u.gso.features = 0; > @@ -703,7 +730,7 @@ static void xen_netbk_rx_action(struct xen_netbk *netbk) > } > > netbk_add_frag_responses(vif, status, > - netbk->meta + npo.meta_cons + 1, > + m + npo.meta_cons + 1, > sco->meta_slots_used); > > RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&vif->rx, ret); > @@ -726,6 +753,9 @@ static void xen_netbk_rx_action(struct xen_netbk *netbk) > if (!skb_queue_empty(&netbk->rx_queue) && > !timer_pending(&netbk->net_timer)) > xen_netbk_kick_thread(netbk); > + > + put_cpu_var(grant_copy_op); > + put_cpu_var(meta); > } > > void xen_netbk_queue_tx_skb(struct xenvif *vif, struct sk_buff *skb) > @@ -1351,9 +1381,10 @@ static bool tx_credit_exceeded(struct xenvif *vif, > unsigned size) > return false; > } > > -static unsigned xen_netbk_tx_build_gops(struct xen_netbk *netbk) > +static unsigned xen_netbk_tx_build_gops(struct xen_netbk *netbk, > + struct gnttab_copy *tco) > { > - struct gnttab_copy *gop = netbk->tx_copy_ops, *request_gop; > + struct gnttab_copy *gop = tco, *request_gop; > struct sk_buff *skb; > int ret; > > @@ -1531,16 +1562,17 @@ static unsigned xen_netbk_tx_build_gops(struct > xen_netbk *netbk) > vif->tx.req_cons = idx; > xen_netbk_check_rx_xenvif(vif); > > - if ((gop-netbk->tx_copy_ops) >= ARRAY_SIZE(netbk->tx_copy_ops)) > + if ((gop-tco) >= TX_COPY_OPS_SIZE) > break; > } > > - return gop - netbk->tx_copy_ops; > + return gop - tco; > } > > -static void xen_netbk_tx_submit(struct xen_netbk *netbk) > +static void xen_netbk_tx_submit(struct xen_netbk *netbk, > + struct gnttab_copy *tco) > { > - struct gnttab_copy *gop = netbk->tx_copy_ops; > + struct gnttab_copy *gop = tco; > struct sk_buff *skb; > > while ((skb = __skb_dequeue(&netbk->tx_queue)) != NULL) { > @@ -1615,15 +1647,37 @@ static void xen_netbk_tx_submit(struct xen_netbk > *netbk) > static void xen_netbk_tx_action(struct xen_netbk *netbk) > { > unsigned nr_gops; > + struct gnttab_copy *tco; > + static int unusable_count; > + > + tco = get_cpu_var(tx_copy_ops); > + > + if (tco == NULL) { > + put_cpu_var(tx_copy_ops); > + if (unusable_count == 1000) { > + printk(KERN_ALERT Ditto. printk_ratelimited. > + "xen-netback: " > + "CPU %d scratch space is not available," > + " not doing any RX work for netback/%d\n", > + smp_processor_id(), > + (int)(netbk - xen_netbk)); And can you explain what the recovery mechanism is? > + } else > + unusable_count++; > + return; > + } > > - nr_gops = xen_netbk_tx_build_gops(netbk); > + nr_gops = xen_netbk_tx_build_gops(netbk, tco); > > - if (nr_gops == 0) > + if (nr_gops == 0) { > + put_cpu_var(tx_copy_ops); > return; > + } > + > + gnttab_batch_copy(tco, nr_gops); > > - gnttab_batch_copy(netbk->tx_copy_ops, nr_gops); > + xen_netbk_tx_submit(netbk, tco); > > - xen_netbk_tx_submit(netbk); > + put_cpu_var(tx_copy_ops); > } > > static void xen_netbk_idx_release(struct xen_netbk *netbk, u16 pending_idx, > @@ -1760,6 +1814,93 @@ static int xen_netbk_kthread(void *data) > return 0; > } > > +static int __create_percpu_scratch_space(unsigned int cpu) > +{ > + if (per_cpu(tx_copy_ops, cpu) || > + per_cpu(grant_copy_op, cpu) || > + per_cpu(meta, cpu)) > + return 0; > + > + per_cpu(tx_copy_ops, cpu) = > + vzalloc_node(sizeof(struct gnttab_copy) * TX_COPY_OPS_SIZE, > + cpu_to_node(cpu)); > + > + per_cpu(grant_copy_op, cpu) = > + vzalloc_node(sizeof(struct gnttab_copy) * GRANT_COPY_OP_SIZE, > + cpu_to_node(cpu)); > + > + per_cpu(meta, cpu) = > + vzalloc_node(sizeof(struct netbk_rx_meta) * META_SIZE, > + cpu_to_node(cpu)); > + > + if (!per_cpu(tx_copy_ops, cpu) || > + !per_cpu(grant_copy_op, cpu) || > + !per_cpu(meta, cpu)) > + return -ENOMEM; And no freeing? Ah you require the __free_percpu_scratch_space to do the job for you. Um, why not do it here instead of depending on the calleer to clean up the mess? Say: { __free_percpu_scratch_space(cpu); return -ENOMEM; } ? > + > + return 0; > +} > + > +static void __free_percpu_scratch_space(unsigned( int cpu) > +{ > + void *tmp; > + > + tmp = per_cpu(tx_copy_ops, cpu); > + per_cpu(tx_copy_ops, cpu) = NULL; > + vfree(tmp); > + > + tmp = per_cpu(grant_copy_op, cpu); > + per_cpu(grant_copy_op, cpu) = NULL; > + vfree(tmp); > + > + tmp = per_cpu(meta, cpu); > + per_cpu(meta, cpu) = NULL; > + vfree(tmp); > +} > + > +static int __netback_percpu_callback(struct notifier_block *nfb, > + unsigned long action, void *hcpu) > +{ > + unsigned int cpu = (unsigned long)hcpu; > + int rc = NOTIFY_DONE; > + > + switch (action) { > + case CPU_ONLINE: > + case CPU_ONLINE_FROZEN: > + printk(KERN_INFO "xen-netback: CPU %d online, creating scratch > space\n", > + cpu); I think this is more of pr_debug type. > + rc = __create_percpu_scratch_space(cpu); > + if (rc) { > + printk(KERN_ALERT "xen-netback: failed to create > scratch space for CPU %d\n", > + cpu); > + /* There is really nothing more we can do. Free any > + * partially allocated scratch space. When processing > + * routines get to run they will just print warning > + * message and stop processing. > + */ > + __free_percpu_scratch_space(cpu); Ugh. Could the code skip creating a kthread on a CPU for which the per_cpu(meta, cpu) == NULL? > + rc = NOTIFY_BAD; > + } else > + rc = NOTIFY_OK; > + break; > + case CPU_DEAD: > + case CPU_DEAD_FROZEN: > + printk(KERN_INFO "xen-netback: CPU %d offline, destroying > scratch space\n", > + cpu); > + __free_percpu_scratch_space(cpu); > + rc = NOTIFY_OK; > + break; > + default: > + break; > + } > + > + return rc; > +} > + > +static struct notifier_block netback_notifier_block = { > + .notifier_call = __netback_percpu_callback, > +}; > + > void xen_netbk_unmap_frontend_rings(struct xenvif *vif) > { > if (vif->tx.sring) > @@ -1810,6 +1951,7 @@ static int __init netback_init(void) > int i; > int rc = 0; > int group; > + int cpu; > > if (!xen_domain()) > return -ENODEV; > @@ -1821,10 +1963,21 @@ static int __init netback_init(void) > fatal_skb_slots = XEN_NETBK_LEGACY_SLOTS_MAX; > } > > + for_each_online_cpu(cpu) { > + rc = __create_percpu_scratch_space(cpu); > + if (rc) { > + rc = -ENOMEM; > + goto failed_init; > + } > + } > + register_hotcpu_notifier(&netback_notifier_block); > + > xen_netbk_group_nr = num_online_cpus(); > xen_netbk = vzalloc(sizeof(struct xen_netbk) * xen_netbk_group_nr); > - if (!xen_netbk) > - return -ENOMEM; > + if (!xen_netbk) { > + goto failed_init; > + rc = -ENOMEM; > + } > > for (group = 0; group < xen_netbk_group_nr; group++) { > struct xen_netbk *netbk = &xen_netbk[group]; > @@ -1849,7 +2002,7 @@ static int __init netback_init(void) > printk(KERN_ALERT "kthread_create() fails at > netback\n"); > del_timer(&netbk->net_timer); > rc = PTR_ERR(netbk->task); > - goto failed_init; > + goto failed_init_destroy_kthreads; > } > > kthread_bind(netbk->task, group); > @@ -1865,17 +2018,20 @@ static int __init netback_init(void) > > rc = xenvif_xenbus_init(); > if (rc) > - goto failed_init; > + goto failed_init_destroy_kthreads; > > return 0; > > -failed_init: > +failed_init_destroy_kthreads: > while (--group >= 0) { > struct xen_netbk *netbk = &xen_netbk[group]; > del_timer(&netbk->net_timer); > kthread_stop(netbk->task); > } > vfree(xen_netbk); > +failed_init: > + for_each_online_cpu(cpu) > + __free_percpu_scratch_space(cpu); > return rc; > > } > @@ -1899,6 +2055,11 @@ static void __exit netback_fini(void) > } > > vfree(xen_netbk); > + > + unregister_hotcpu_notifier(&netback_notifier_block); > + > + for_each_online_cpu(i) > + __free_percpu_scratch_space(i); > } > module_exit(netback_fini); > > -- > 1.7.10.4 > _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxx http://lists.xen.org/xen-devel

©2013 Xen Project, A Linux Foundation Collaborative Project. All Rights Reserved.
Linux Foundation is a registered trademark of The Linux Foundation.
Xen Project is a trademark of The Linux Foundation.