[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [PATCH 2/3] xen-netback: switch to per-cpu scratch space



On Mon, May 27, 2013 at 12:29:42PM +0100, Wei Liu wrote:
> There are maximum nr_onlie_cpus netback threads running. We can make use
> of per-cpu scratch space to reduce the size of buffer space when we move
> to 1:1 model.
> 
> In the unlikely event when per-cpu scratch space is not available,
> processing routines will refuse to run on that CPU.
> 
> Signed-off-by: Wei Liu <wei.liu2@xxxxxxxxxx>
> ---
>  drivers/net/xen-netback/netback.c |  247 
> ++++++++++++++++++++++++++++++-------
>  1 file changed, 204 insertions(+), 43 deletions(-)
> 
> diff --git a/drivers/net/xen-netback/netback.c 
> b/drivers/net/xen-netback/netback.c
> index 54853be..0f69eda 100644
> --- a/drivers/net/xen-netback/netback.c
> +++ b/drivers/net/xen-netback/netback.c
> @@ -37,6 +37,7 @@
>  #include <linux/kthread.h>
>  #include <linux/if_vlan.h>
>  #include <linux/udp.h>
> +#include <linux/cpu.h>
>  
>  #include <net/tcp.h>
>  
> @@ -95,6 +96,24 @@ struct netbk_rx_meta {
>  
>  #define MAX_BUFFER_OFFSET PAGE_SIZE
>  
> +/* Coalescing tx requests before copying makes number of grant
> + * copy ops greater or equal to number of slots required. In
> + * worst case a tx request consumes 2 gnttab_copy. So the size
> + * of tx_copy_ops array should be 2*MAX_PENDING_REQS.
> + */
> +#define TX_COPY_OPS_SIZE (2*MAX_PENDING_REQS)
> +DEFINE_PER_CPU(struct gnttab_copy *, tx_copy_ops);

static
> +
> +/* Given MAX_BUFFER_OFFSET of 4096 the worst case is that each
> + * head/fragment page uses 2 copy operations because it
> + * straddles two buffers in the frontend. So the size of following
> + * arrays should be 2*XEN_NETIF_RX_RING_SIZE.
> + */
> +#define GRANT_COPY_OP_SIZE (2*XEN_NETIF_RX_RING_SIZE)
> +#define META_SIZE (2*XEN_NETIF_RX_RING_SIZE)
> +DEFINE_PER_CPU(struct gnttab_copy *, grant_copy_op);
> +DEFINE_PER_CPU(struct netbk_rx_meta *, meta);

static for both of them.

> +
>  struct xen_netbk {
>       wait_queue_head_t wq;
>       struct task_struct *task;
> @@ -116,21 +135,7 @@ struct xen_netbk {
>       atomic_t netfront_count;
>  
>       struct pending_tx_info pending_tx_info[MAX_PENDING_REQS];
> -     /* Coalescing tx requests before copying makes number of grant
> -      * copy ops greater or equal to number of slots required. In
> -      * worst case a tx request consumes 2 gnttab_copy.
> -      */
> -     struct gnttab_copy tx_copy_ops[2*MAX_PENDING_REQS];
> -
>       u16 pending_ring[MAX_PENDING_REQS];
> -
> -     /*
> -      * Given MAX_BUFFER_OFFSET of 4096 the worst case is that each
> -      * head/fragment page uses 2 copy operations because it
> -      * straddles two buffers in the frontend.
> -      */
> -     struct gnttab_copy grant_copy_op[2*XEN_NETIF_RX_RING_SIZE];
> -     struct netbk_rx_meta meta[2*XEN_NETIF_RX_RING_SIZE];
>  };
>  
>  static struct xen_netbk *xen_netbk;
> @@ -608,12 +613,31 @@ static void xen_netbk_rx_action(struct xen_netbk *netbk)
>       int count;
>       unsigned long offset;
>       struct skb_cb_overlay *sco;
> +     struct gnttab_copy *gco = get_cpu_var(grant_copy_op);
> +     struct netbk_rx_meta *m = get_cpu_var(meta);
> +     static int unusable_count;
>  
>       struct netrx_pending_operations npo = {
> -             .copy  = netbk->grant_copy_op,
> -             .meta  = netbk->meta,
> +             .copy = gco,
> +             .meta = m,
>       };
>  
> +     if (gco == NULL || m == NULL) {
> +             put_cpu_var(grant_copy_op);
> +             put_cpu_var(meta);
> +             if (unusable_count == 1000) {

printk_ratelimited ?

> +                     printk(KERN_ALERT
> +                            "xen-netback: "
> +                            "CPU %d scratch space is not available,"
> +                            " not doing any TX work for netback/%d\n",
> +                            smp_processor_id(),
> +                            (int)(netbk - xen_netbk));

So ... are you going to retry it? Drop it? Can you include in the message the
the mechanism by which you are going to recover?

> +                     unusable_count = 0;
> +             } else
> +                     unusable_count++;
> +             return;
> +     }
> +
>       skb_queue_head_init(&rxq);
>  
>       count = 0;
> @@ -635,27 +659,30 @@ static void xen_netbk_rx_action(struct xen_netbk *netbk)
>                       break;
>       }
>  
> -     BUG_ON(npo.meta_prod > ARRAY_SIZE(netbk->meta));
> +     BUG_ON(npo.meta_prod > META_SIZE);
>  
> -     if (!npo.copy_prod)
> +     if (!npo.copy_prod) {
> +             put_cpu_var(grant_copy_op);
> +             put_cpu_var(meta);
>               return;
> +     }
>  
> -     BUG_ON(npo.copy_prod > ARRAY_SIZE(netbk->grant_copy_op));
> -     gnttab_batch_copy(netbk->grant_copy_op, npo.copy_prod);
> +     BUG_ON(npo.copy_prod > GRANT_COPY_OP_SIZE);
> +     gnttab_batch_copy(gco, npo.copy_prod);
>  
>       while ((skb = __skb_dequeue(&rxq)) != NULL) {
>               sco = (struct skb_cb_overlay *)skb->cb;
>  
>               vif = netdev_priv(skb->dev);
>  
> -             if (netbk->meta[npo.meta_cons].gso_size && vif->gso_prefix) {
> +             if (m[npo.meta_cons].gso_size && vif->gso_prefix) {
>                       resp = RING_GET_RESPONSE(&vif->rx,
>                                               vif->rx.rsp_prod_pvt++);
>  
>                       resp->flags = XEN_NETRXF_gso_prefix | 
> XEN_NETRXF_more_data;
>  
> -                     resp->offset = netbk->meta[npo.meta_cons].gso_size;
> -                     resp->id = netbk->meta[npo.meta_cons].id;
> +                     resp->offset = m[npo.meta_cons].gso_size;
> +                     resp->id = m[npo.meta_cons].id;
>                       resp->status = sco->meta_slots_used;
>  
>                       npo.meta_cons++;
> @@ -680,12 +707,12 @@ static void xen_netbk_rx_action(struct xen_netbk *netbk)
>                       flags |= XEN_NETRXF_data_validated;
>  
>               offset = 0;
> -             resp = make_rx_response(vif, netbk->meta[npo.meta_cons].id,
> +             resp = make_rx_response(vif, m[npo.meta_cons].id,
>                                       status, offset,
> -                                     netbk->meta[npo.meta_cons].size,
> +                                     m[npo.meta_cons].size,
>                                       flags);
>  
> -             if (netbk->meta[npo.meta_cons].gso_size && !vif->gso_prefix) {
> +             if (m[npo.meta_cons].gso_size && !vif->gso_prefix) {
>                       struct xen_netif_extra_info *gso =
>                               (struct xen_netif_extra_info *)
>                               RING_GET_RESPONSE(&vif->rx,
> @@ -693,7 +720,7 @@ static void xen_netbk_rx_action(struct xen_netbk *netbk)
>  
>                       resp->flags |= XEN_NETRXF_extra_info;
>  
> -                     gso->u.gso.size = netbk->meta[npo.meta_cons].gso_size;
> +                     gso->u.gso.size = m[npo.meta_cons].gso_size;
>                       gso->u.gso.type = XEN_NETIF_GSO_TYPE_TCPV4;
>                       gso->u.gso.pad = 0;
>                       gso->u.gso.features = 0;
> @@ -703,7 +730,7 @@ static void xen_netbk_rx_action(struct xen_netbk *netbk)
>               }
>  
>               netbk_add_frag_responses(vif, status,
> -                                      netbk->meta + npo.meta_cons + 1,
> +                                      m + npo.meta_cons + 1,
>                                        sco->meta_slots_used);
>  
>               RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&vif->rx, ret);
> @@ -726,6 +753,9 @@ static void xen_netbk_rx_action(struct xen_netbk *netbk)
>       if (!skb_queue_empty(&netbk->rx_queue) &&
>                       !timer_pending(&netbk->net_timer))
>               xen_netbk_kick_thread(netbk);
> +
> +     put_cpu_var(grant_copy_op);
> +     put_cpu_var(meta);
>  }
>  
>  void xen_netbk_queue_tx_skb(struct xenvif *vif, struct sk_buff *skb)
> @@ -1351,9 +1381,10 @@ static bool tx_credit_exceeded(struct xenvif *vif, 
> unsigned size)
>       return false;
>  }
>  
> -static unsigned xen_netbk_tx_build_gops(struct xen_netbk *netbk)
> +static unsigned xen_netbk_tx_build_gops(struct xen_netbk *netbk,
> +                                     struct gnttab_copy *tco)
>  {
> -     struct gnttab_copy *gop = netbk->tx_copy_ops, *request_gop;
> +     struct gnttab_copy *gop = tco, *request_gop;
>       struct sk_buff *skb;
>       int ret;
>  
> @@ -1531,16 +1562,17 @@ static unsigned xen_netbk_tx_build_gops(struct 
> xen_netbk *netbk)
>               vif->tx.req_cons = idx;
>               xen_netbk_check_rx_xenvif(vif);
>  
> -             if ((gop-netbk->tx_copy_ops) >= ARRAY_SIZE(netbk->tx_copy_ops))
> +             if ((gop-tco) >= TX_COPY_OPS_SIZE)
>                       break;
>       }
>  
> -     return gop - netbk->tx_copy_ops;
> +     return gop - tco;
>  }
>  
> -static void xen_netbk_tx_submit(struct xen_netbk *netbk)
> +static void xen_netbk_tx_submit(struct xen_netbk *netbk,
> +                             struct gnttab_copy *tco)
>  {
> -     struct gnttab_copy *gop = netbk->tx_copy_ops;
> +     struct gnttab_copy *gop = tco;
>       struct sk_buff *skb;
>  
>       while ((skb = __skb_dequeue(&netbk->tx_queue)) != NULL) {
> @@ -1615,15 +1647,37 @@ static void xen_netbk_tx_submit(struct xen_netbk 
> *netbk)
>  static void xen_netbk_tx_action(struct xen_netbk *netbk)
>  {
>       unsigned nr_gops;
> +     struct gnttab_copy *tco;
> +     static int unusable_count;
> +
> +     tco = get_cpu_var(tx_copy_ops);
> +
> +     if (tco == NULL) {
> +             put_cpu_var(tx_copy_ops);
> +             if (unusable_count == 1000) {
> +                     printk(KERN_ALERT

Ditto. printk_ratelimited.

> +                            "xen-netback: "
> +                            "CPU %d scratch space is not available,"
> +                            " not doing any RX work for netback/%d\n",
> +                            smp_processor_id(),
> +                            (int)(netbk - xen_netbk));

And can you explain what the recovery mechanism is?

> +             } else
> +                     unusable_count++;
> +             return;
> +     }
>  
> -     nr_gops = xen_netbk_tx_build_gops(netbk);
> +     nr_gops = xen_netbk_tx_build_gops(netbk, tco);
>  
> -     if (nr_gops == 0)
> +     if (nr_gops == 0) {
> +             put_cpu_var(tx_copy_ops);
>               return;
> +     }
> +
> +     gnttab_batch_copy(tco, nr_gops);
>  
> -     gnttab_batch_copy(netbk->tx_copy_ops, nr_gops);
> +     xen_netbk_tx_submit(netbk, tco);
>  
> -     xen_netbk_tx_submit(netbk);
> +     put_cpu_var(tx_copy_ops);
>  }
>  
>  static void xen_netbk_idx_release(struct xen_netbk *netbk, u16 pending_idx,
> @@ -1760,6 +1814,93 @@ static int xen_netbk_kthread(void *data)
>       return 0;
>  }
>  
> +static int __create_percpu_scratch_space(unsigned int cpu)
> +{
> +     if (per_cpu(tx_copy_ops, cpu) ||
> +         per_cpu(grant_copy_op, cpu) ||
> +         per_cpu(meta, cpu))
> +             return 0;
> +
> +     per_cpu(tx_copy_ops, cpu) =
> +             vzalloc_node(sizeof(struct gnttab_copy) * TX_COPY_OPS_SIZE,
> +                          cpu_to_node(cpu));
> +
> +     per_cpu(grant_copy_op, cpu) =
> +             vzalloc_node(sizeof(struct gnttab_copy) * GRANT_COPY_OP_SIZE,
> +                          cpu_to_node(cpu));
> +
> +     per_cpu(meta, cpu) =
> +             vzalloc_node(sizeof(struct netbk_rx_meta) * META_SIZE,
> +                          cpu_to_node(cpu));
> +
> +     if (!per_cpu(tx_copy_ops, cpu) ||
> +         !per_cpu(grant_copy_op, cpu) ||
> +         !per_cpu(meta, cpu))
> +             return -ENOMEM;

And no freeing? Ah you require the __free_percpu_scratch_space to
do the job for you. Um, why not do it here instead of depending
on the calleer to clean up the mess?

Say:
                {
                        __free_percpu_scratch_space(cpu);
                        return -ENOMEM;
                }
?


> +
> +     return 0;
> +}
> +
> +static void __free_percpu_scratch_space(unsigned( int cpu)
> +{
> +     void *tmp;
> +
> +     tmp = per_cpu(tx_copy_ops, cpu);
> +     per_cpu(tx_copy_ops, cpu) = NULL;
> +     vfree(tmp);
> +
> +     tmp = per_cpu(grant_copy_op, cpu);
> +     per_cpu(grant_copy_op, cpu) = NULL;
> +     vfree(tmp);
> +
> +     tmp = per_cpu(meta, cpu);
> +     per_cpu(meta, cpu) = NULL;
> +     vfree(tmp);
> +}
> +
> +static int __netback_percpu_callback(struct notifier_block *nfb,
> +                                  unsigned long action, void *hcpu)
> +{
> +     unsigned int cpu = (unsigned long)hcpu;
> +     int rc = NOTIFY_DONE;
> +
> +     switch (action) {
> +     case CPU_ONLINE:
> +     case CPU_ONLINE_FROZEN:
> +             printk(KERN_INFO "xen-netback: CPU %d online, creating scratch 
> space\n",
> +                    cpu);

I think this is more of pr_debug type.

> +             rc = __create_percpu_scratch_space(cpu);
> +             if (rc) {
> +                     printk(KERN_ALERT "xen-netback: failed to create 
> scratch space for CPU %d\n",
> +                            cpu);
> +                     /* There is really nothing more we can do. Free any
> +                      * partially allocated scratch space. When processing
> +                      * routines get to run they will just print warning
> +                      * message and stop processing.
> +                      */
> +                     __free_percpu_scratch_space(cpu);

Ugh. Could the code skip creating a kthread on a CPU for which
the per_cpu(meta, cpu) == NULL?

> +                     rc = NOTIFY_BAD;
> +             } else
> +                     rc = NOTIFY_OK;
> +             break;
> +     case CPU_DEAD:
> +     case CPU_DEAD_FROZEN:
> +             printk(KERN_INFO "xen-netback: CPU %d offline, destroying 
> scratch space\n",
> +                    cpu);
> +             __free_percpu_scratch_space(cpu);
> +             rc = NOTIFY_OK;
> +             break;
> +     default:
> +             break;
> +     }
> +
> +     return rc;
> +}
> +
> +static struct notifier_block netback_notifier_block = {
> +     .notifier_call = __netback_percpu_callback,
> +};
> +
>  void xen_netbk_unmap_frontend_rings(struct xenvif *vif)
>  {
>       if (vif->tx.sring)
> @@ -1810,6 +1951,7 @@ static int __init netback_init(void)
>       int i;
>       int rc = 0;
>       int group;
> +     int cpu;
>  
>       if (!xen_domain())
>               return -ENODEV;
> @@ -1821,10 +1963,21 @@ static int __init netback_init(void)
>               fatal_skb_slots = XEN_NETBK_LEGACY_SLOTS_MAX;
>       }
>  
> +     for_each_online_cpu(cpu) {
> +             rc = __create_percpu_scratch_space(cpu);
> +             if (rc) {
> +                     rc = -ENOMEM;
> +                     goto failed_init;
> +             }
> +     }
> +     register_hotcpu_notifier(&netback_notifier_block);
> +
>       xen_netbk_group_nr = num_online_cpus();
>       xen_netbk = vzalloc(sizeof(struct xen_netbk) * xen_netbk_group_nr);
> -     if (!xen_netbk)
> -             return -ENOMEM;
> +     if (!xen_netbk) {
> +             goto failed_init;
> +             rc = -ENOMEM;
> +     }
>  
>       for (group = 0; group < xen_netbk_group_nr; group++) {
>               struct xen_netbk *netbk = &xen_netbk[group];
> @@ -1849,7 +2002,7 @@ static int __init netback_init(void)
>                       printk(KERN_ALERT "kthread_create() fails at 
> netback\n");
>                       del_timer(&netbk->net_timer);
>                       rc = PTR_ERR(netbk->task);
> -                     goto failed_init;
> +                     goto failed_init_destroy_kthreads;
>               }
>  
>               kthread_bind(netbk->task, group);
> @@ -1865,17 +2018,20 @@ static int __init netback_init(void)
>  
>       rc = xenvif_xenbus_init();
>       if (rc)
> -             goto failed_init;
> +             goto failed_init_destroy_kthreads;
>  
>       return 0;
>  
> -failed_init:
> +failed_init_destroy_kthreads:
>       while (--group >= 0) {
>               struct xen_netbk *netbk = &xen_netbk[group];
>               del_timer(&netbk->net_timer);
>               kthread_stop(netbk->task);
>       }
>       vfree(xen_netbk);
> +failed_init:
> +     for_each_online_cpu(cpu)
> +             __free_percpu_scratch_space(cpu);
>       return rc;
>  
>  }
> @@ -1899,6 +2055,11 @@ static void __exit netback_fini(void)
>       }
>  
>       vfree(xen_netbk);
> +
> +     unregister_hotcpu_notifier(&netback_notifier_block);
> +
> +     for_each_online_cpu(i)
> +             __free_percpu_scratch_space(i);
>  }
>  module_exit(netback_fini);
>  
> -- 
> 1.7.10.4
> 

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.