|
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [Xen-devel] [PATCH 2/3] xen-netback: switch to per-cpu scratch space
On Mon, May 27, 2013 at 12:29:42PM +0100, Wei Liu wrote:
> There are maximum nr_onlie_cpus netback threads running. We can make use
> of per-cpu scratch space to reduce the size of buffer space when we move
> to 1:1 model.
>
> In the unlikely event when per-cpu scratch space is not available,
> processing routines will refuse to run on that CPU.
>
> Signed-off-by: Wei Liu <wei.liu2@xxxxxxxxxx>
> ---
> drivers/net/xen-netback/netback.c | 247
> ++++++++++++++++++++++++++++++-------
> 1 file changed, 204 insertions(+), 43 deletions(-)
>
> diff --git a/drivers/net/xen-netback/netback.c
> b/drivers/net/xen-netback/netback.c
> index 54853be..0f69eda 100644
> --- a/drivers/net/xen-netback/netback.c
> +++ b/drivers/net/xen-netback/netback.c
> @@ -37,6 +37,7 @@
> #include <linux/kthread.h>
> #include <linux/if_vlan.h>
> #include <linux/udp.h>
> +#include <linux/cpu.h>
>
> #include <net/tcp.h>
>
> @@ -95,6 +96,24 @@ struct netbk_rx_meta {
>
> #define MAX_BUFFER_OFFSET PAGE_SIZE
>
> +/* Coalescing tx requests before copying makes number of grant
> + * copy ops greater or equal to number of slots required. In
> + * worst case a tx request consumes 2 gnttab_copy. So the size
> + * of tx_copy_ops array should be 2*MAX_PENDING_REQS.
> + */
> +#define TX_COPY_OPS_SIZE (2*MAX_PENDING_REQS)
> +DEFINE_PER_CPU(struct gnttab_copy *, tx_copy_ops);
static
> +
> +/* Given MAX_BUFFER_OFFSET of 4096 the worst case is that each
> + * head/fragment page uses 2 copy operations because it
> + * straddles two buffers in the frontend. So the size of following
> + * arrays should be 2*XEN_NETIF_RX_RING_SIZE.
> + */
> +#define GRANT_COPY_OP_SIZE (2*XEN_NETIF_RX_RING_SIZE)
> +#define META_SIZE (2*XEN_NETIF_RX_RING_SIZE)
> +DEFINE_PER_CPU(struct gnttab_copy *, grant_copy_op);
> +DEFINE_PER_CPU(struct netbk_rx_meta *, meta);
static for both of them.
> +
> struct xen_netbk {
> wait_queue_head_t wq;
> struct task_struct *task;
> @@ -116,21 +135,7 @@ struct xen_netbk {
> atomic_t netfront_count;
>
> struct pending_tx_info pending_tx_info[MAX_PENDING_REQS];
> - /* Coalescing tx requests before copying makes number of grant
> - * copy ops greater or equal to number of slots required. In
> - * worst case a tx request consumes 2 gnttab_copy.
> - */
> - struct gnttab_copy tx_copy_ops[2*MAX_PENDING_REQS];
> -
> u16 pending_ring[MAX_PENDING_REQS];
> -
> - /*
> - * Given MAX_BUFFER_OFFSET of 4096 the worst case is that each
> - * head/fragment page uses 2 copy operations because it
> - * straddles two buffers in the frontend.
> - */
> - struct gnttab_copy grant_copy_op[2*XEN_NETIF_RX_RING_SIZE];
> - struct netbk_rx_meta meta[2*XEN_NETIF_RX_RING_SIZE];
> };
>
> static struct xen_netbk *xen_netbk;
> @@ -608,12 +613,31 @@ static void xen_netbk_rx_action(struct xen_netbk *netbk)
> int count;
> unsigned long offset;
> struct skb_cb_overlay *sco;
> + struct gnttab_copy *gco = get_cpu_var(grant_copy_op);
> + struct netbk_rx_meta *m = get_cpu_var(meta);
> + static int unusable_count;
>
> struct netrx_pending_operations npo = {
> - .copy = netbk->grant_copy_op,
> - .meta = netbk->meta,
> + .copy = gco,
> + .meta = m,
> };
>
> + if (gco == NULL || m == NULL) {
> + put_cpu_var(grant_copy_op);
> + put_cpu_var(meta);
> + if (unusable_count == 1000) {
printk_ratelimited ?
> + printk(KERN_ALERT
> + "xen-netback: "
> + "CPU %d scratch space is not available,"
> + " not doing any TX work for netback/%d\n",
> + smp_processor_id(),
> + (int)(netbk - xen_netbk));
So ... are you going to retry it? Drop it? Can you include in the message the
the mechanism by which you are going to recover?
> + unusable_count = 0;
> + } else
> + unusable_count++;
> + return;
> + }
> +
> skb_queue_head_init(&rxq);
>
> count = 0;
> @@ -635,27 +659,30 @@ static void xen_netbk_rx_action(struct xen_netbk *netbk)
> break;
> }
>
> - BUG_ON(npo.meta_prod > ARRAY_SIZE(netbk->meta));
> + BUG_ON(npo.meta_prod > META_SIZE);
>
> - if (!npo.copy_prod)
> + if (!npo.copy_prod) {
> + put_cpu_var(grant_copy_op);
> + put_cpu_var(meta);
> return;
> + }
>
> - BUG_ON(npo.copy_prod > ARRAY_SIZE(netbk->grant_copy_op));
> - gnttab_batch_copy(netbk->grant_copy_op, npo.copy_prod);
> + BUG_ON(npo.copy_prod > GRANT_COPY_OP_SIZE);
> + gnttab_batch_copy(gco, npo.copy_prod);
>
> while ((skb = __skb_dequeue(&rxq)) != NULL) {
> sco = (struct skb_cb_overlay *)skb->cb;
>
> vif = netdev_priv(skb->dev);
>
> - if (netbk->meta[npo.meta_cons].gso_size && vif->gso_prefix) {
> + if (m[npo.meta_cons].gso_size && vif->gso_prefix) {
> resp = RING_GET_RESPONSE(&vif->rx,
> vif->rx.rsp_prod_pvt++);
>
> resp->flags = XEN_NETRXF_gso_prefix |
> XEN_NETRXF_more_data;
>
> - resp->offset = netbk->meta[npo.meta_cons].gso_size;
> - resp->id = netbk->meta[npo.meta_cons].id;
> + resp->offset = m[npo.meta_cons].gso_size;
> + resp->id = m[npo.meta_cons].id;
> resp->status = sco->meta_slots_used;
>
> npo.meta_cons++;
> @@ -680,12 +707,12 @@ static void xen_netbk_rx_action(struct xen_netbk *netbk)
> flags |= XEN_NETRXF_data_validated;
>
> offset = 0;
> - resp = make_rx_response(vif, netbk->meta[npo.meta_cons].id,
> + resp = make_rx_response(vif, m[npo.meta_cons].id,
> status, offset,
> - netbk->meta[npo.meta_cons].size,
> + m[npo.meta_cons].size,
> flags);
>
> - if (netbk->meta[npo.meta_cons].gso_size && !vif->gso_prefix) {
> + if (m[npo.meta_cons].gso_size && !vif->gso_prefix) {
> struct xen_netif_extra_info *gso =
> (struct xen_netif_extra_info *)
> RING_GET_RESPONSE(&vif->rx,
> @@ -693,7 +720,7 @@ static void xen_netbk_rx_action(struct xen_netbk *netbk)
>
> resp->flags |= XEN_NETRXF_extra_info;
>
> - gso->u.gso.size = netbk->meta[npo.meta_cons].gso_size;
> + gso->u.gso.size = m[npo.meta_cons].gso_size;
> gso->u.gso.type = XEN_NETIF_GSO_TYPE_TCPV4;
> gso->u.gso.pad = 0;
> gso->u.gso.features = 0;
> @@ -703,7 +730,7 @@ static void xen_netbk_rx_action(struct xen_netbk *netbk)
> }
>
> netbk_add_frag_responses(vif, status,
> - netbk->meta + npo.meta_cons + 1,
> + m + npo.meta_cons + 1,
> sco->meta_slots_used);
>
> RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&vif->rx, ret);
> @@ -726,6 +753,9 @@ static void xen_netbk_rx_action(struct xen_netbk *netbk)
> if (!skb_queue_empty(&netbk->rx_queue) &&
> !timer_pending(&netbk->net_timer))
> xen_netbk_kick_thread(netbk);
> +
> + put_cpu_var(grant_copy_op);
> + put_cpu_var(meta);
> }
>
> void xen_netbk_queue_tx_skb(struct xenvif *vif, struct sk_buff *skb)
> @@ -1351,9 +1381,10 @@ static bool tx_credit_exceeded(struct xenvif *vif,
> unsigned size)
> return false;
> }
>
> -static unsigned xen_netbk_tx_build_gops(struct xen_netbk *netbk)
> +static unsigned xen_netbk_tx_build_gops(struct xen_netbk *netbk,
> + struct gnttab_copy *tco)
> {
> - struct gnttab_copy *gop = netbk->tx_copy_ops, *request_gop;
> + struct gnttab_copy *gop = tco, *request_gop;
> struct sk_buff *skb;
> int ret;
>
> @@ -1531,16 +1562,17 @@ static unsigned xen_netbk_tx_build_gops(struct
> xen_netbk *netbk)
> vif->tx.req_cons = idx;
> xen_netbk_check_rx_xenvif(vif);
>
> - if ((gop-netbk->tx_copy_ops) >= ARRAY_SIZE(netbk->tx_copy_ops))
> + if ((gop-tco) >= TX_COPY_OPS_SIZE)
> break;
> }
>
> - return gop - netbk->tx_copy_ops;
> + return gop - tco;
> }
>
> -static void xen_netbk_tx_submit(struct xen_netbk *netbk)
> +static void xen_netbk_tx_submit(struct xen_netbk *netbk,
> + struct gnttab_copy *tco)
> {
> - struct gnttab_copy *gop = netbk->tx_copy_ops;
> + struct gnttab_copy *gop = tco;
> struct sk_buff *skb;
>
> while ((skb = __skb_dequeue(&netbk->tx_queue)) != NULL) {
> @@ -1615,15 +1647,37 @@ static void xen_netbk_tx_submit(struct xen_netbk
> *netbk)
> static void xen_netbk_tx_action(struct xen_netbk *netbk)
> {
> unsigned nr_gops;
> + struct gnttab_copy *tco;
> + static int unusable_count;
> +
> + tco = get_cpu_var(tx_copy_ops);
> +
> + if (tco == NULL) {
> + put_cpu_var(tx_copy_ops);
> + if (unusable_count == 1000) {
> + printk(KERN_ALERT
Ditto. printk_ratelimited.
> + "xen-netback: "
> + "CPU %d scratch space is not available,"
> + " not doing any RX work for netback/%d\n",
> + smp_processor_id(),
> + (int)(netbk - xen_netbk));
And can you explain what the recovery mechanism is?
> + } else
> + unusable_count++;
> + return;
> + }
>
> - nr_gops = xen_netbk_tx_build_gops(netbk);
> + nr_gops = xen_netbk_tx_build_gops(netbk, tco);
>
> - if (nr_gops == 0)
> + if (nr_gops == 0) {
> + put_cpu_var(tx_copy_ops);
> return;
> + }
> +
> + gnttab_batch_copy(tco, nr_gops);
>
> - gnttab_batch_copy(netbk->tx_copy_ops, nr_gops);
> + xen_netbk_tx_submit(netbk, tco);
>
> - xen_netbk_tx_submit(netbk);
> + put_cpu_var(tx_copy_ops);
> }
>
> static void xen_netbk_idx_release(struct xen_netbk *netbk, u16 pending_idx,
> @@ -1760,6 +1814,93 @@ static int xen_netbk_kthread(void *data)
> return 0;
> }
>
> +static int __create_percpu_scratch_space(unsigned int cpu)
> +{
> + if (per_cpu(tx_copy_ops, cpu) ||
> + per_cpu(grant_copy_op, cpu) ||
> + per_cpu(meta, cpu))
> + return 0;
> +
> + per_cpu(tx_copy_ops, cpu) =
> + vzalloc_node(sizeof(struct gnttab_copy) * TX_COPY_OPS_SIZE,
> + cpu_to_node(cpu));
> +
> + per_cpu(grant_copy_op, cpu) =
> + vzalloc_node(sizeof(struct gnttab_copy) * GRANT_COPY_OP_SIZE,
> + cpu_to_node(cpu));
> +
> + per_cpu(meta, cpu) =
> + vzalloc_node(sizeof(struct netbk_rx_meta) * META_SIZE,
> + cpu_to_node(cpu));
> +
> + if (!per_cpu(tx_copy_ops, cpu) ||
> + !per_cpu(grant_copy_op, cpu) ||
> + !per_cpu(meta, cpu))
> + return -ENOMEM;
And no freeing? Ah you require the __free_percpu_scratch_space to
do the job for you. Um, why not do it here instead of depending
on the calleer to clean up the mess?
Say:
{
__free_percpu_scratch_space(cpu);
return -ENOMEM;
}
?
> +
> + return 0;
> +}
> +
> +static void __free_percpu_scratch_space(unsigned( int cpu)
> +{
> + void *tmp;
> +
> + tmp = per_cpu(tx_copy_ops, cpu);
> + per_cpu(tx_copy_ops, cpu) = NULL;
> + vfree(tmp);
> +
> + tmp = per_cpu(grant_copy_op, cpu);
> + per_cpu(grant_copy_op, cpu) = NULL;
> + vfree(tmp);
> +
> + tmp = per_cpu(meta, cpu);
> + per_cpu(meta, cpu) = NULL;
> + vfree(tmp);
> +}
> +
> +static int __netback_percpu_callback(struct notifier_block *nfb,
> + unsigned long action, void *hcpu)
> +{
> + unsigned int cpu = (unsigned long)hcpu;
> + int rc = NOTIFY_DONE;
> +
> + switch (action) {
> + case CPU_ONLINE:
> + case CPU_ONLINE_FROZEN:
> + printk(KERN_INFO "xen-netback: CPU %d online, creating scratch
> space\n",
> + cpu);
I think this is more of pr_debug type.
> + rc = __create_percpu_scratch_space(cpu);
> + if (rc) {
> + printk(KERN_ALERT "xen-netback: failed to create
> scratch space for CPU %d\n",
> + cpu);
> + /* There is really nothing more we can do. Free any
> + * partially allocated scratch space. When processing
> + * routines get to run they will just print warning
> + * message and stop processing.
> + */
> + __free_percpu_scratch_space(cpu);
Ugh. Could the code skip creating a kthread on a CPU for which
the per_cpu(meta, cpu) == NULL?
> + rc = NOTIFY_BAD;
> + } else
> + rc = NOTIFY_OK;
> + break;
> + case CPU_DEAD:
> + case CPU_DEAD_FROZEN:
> + printk(KERN_INFO "xen-netback: CPU %d offline, destroying
> scratch space\n",
> + cpu);
> + __free_percpu_scratch_space(cpu);
> + rc = NOTIFY_OK;
> + break;
> + default:
> + break;
> + }
> +
> + return rc;
> +}
> +
> +static struct notifier_block netback_notifier_block = {
> + .notifier_call = __netback_percpu_callback,
> +};
> +
> void xen_netbk_unmap_frontend_rings(struct xenvif *vif)
> {
> if (vif->tx.sring)
> @@ -1810,6 +1951,7 @@ static int __init netback_init(void)
> int i;
> int rc = 0;
> int group;
> + int cpu;
>
> if (!xen_domain())
> return -ENODEV;
> @@ -1821,10 +1963,21 @@ static int __init netback_init(void)
> fatal_skb_slots = XEN_NETBK_LEGACY_SLOTS_MAX;
> }
>
> + for_each_online_cpu(cpu) {
> + rc = __create_percpu_scratch_space(cpu);
> + if (rc) {
> + rc = -ENOMEM;
> + goto failed_init;
> + }
> + }
> + register_hotcpu_notifier(&netback_notifier_block);
> +
> xen_netbk_group_nr = num_online_cpus();
> xen_netbk = vzalloc(sizeof(struct xen_netbk) * xen_netbk_group_nr);
> - if (!xen_netbk)
> - return -ENOMEM;
> + if (!xen_netbk) {
> + goto failed_init;
> + rc = -ENOMEM;
> + }
>
> for (group = 0; group < xen_netbk_group_nr; group++) {
> struct xen_netbk *netbk = &xen_netbk[group];
> @@ -1849,7 +2002,7 @@ static int __init netback_init(void)
> printk(KERN_ALERT "kthread_create() fails at
> netback\n");
> del_timer(&netbk->net_timer);
> rc = PTR_ERR(netbk->task);
> - goto failed_init;
> + goto failed_init_destroy_kthreads;
> }
>
> kthread_bind(netbk->task, group);
> @@ -1865,17 +2018,20 @@ static int __init netback_init(void)
>
> rc = xenvif_xenbus_init();
> if (rc)
> - goto failed_init;
> + goto failed_init_destroy_kthreads;
>
> return 0;
>
> -failed_init:
> +failed_init_destroy_kthreads:
> while (--group >= 0) {
> struct xen_netbk *netbk = &xen_netbk[group];
> del_timer(&netbk->net_timer);
> kthread_stop(netbk->task);
> }
> vfree(xen_netbk);
> +failed_init:
> + for_each_online_cpu(cpu)
> + __free_percpu_scratch_space(cpu);
> return rc;
>
> }
> @@ -1899,6 +2055,11 @@ static void __exit netback_fini(void)
> }
>
> vfree(xen_netbk);
> +
> + unregister_hotcpu_notifier(&netback_notifier_block);
> +
> + for_each_online_cpu(i)
> + __free_percpu_scratch_space(i);
> }
> module_exit(netback_fini);
>
> --
> 1.7.10.4
>
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |