[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [PATCH v3 11/18] xen/pvcalls: implement accept command



On Thu, 15 Jun 2017, Juergen Gross wrote:
> On 14/06/17 21:27, Stefano Stabellini wrote:
> > On Wed, 14 Jun 2017, Juergen Gross wrote:
> >> On 14/06/17 02:47, Stefano Stabellini wrote:
> >>> On Tue, 13 Jun 2017, Juergen Gross wrote:
> >>>> On 02/06/17 21:31, Stefano Stabellini wrote:
> >>>>> Implement the accept command by calling inet_accept. To avoid blocking
> >>>>> in the kernel, call inet_accept(O_NONBLOCK) from a workqueue, which get
> >>>>> scheduled on sk_data_ready (for a passive socket, it means that there
> >>>>> are connections to accept).
> >>>>>
> >>>>> Use the reqcopy field to store the request. Accept the new socket from
> >>>>> the delayed work function, create a new sock_mapping for it, map
> >>>>> the indexes page and data ring, and reply to the other end. Allocate an
> >>>>> ioworker for the socket.
> >>>>>
> >>>>> Only support one outstanding blocking accept request for every socket at
> >>>>> any time.
> >>>>>
> >>>>> Add a field to sock_mapping to remember the passive socket from which an
> >>>>> active socket was created.
> >>>>>
> >>>>> Signed-off-by: Stefano Stabellini <stefano@xxxxxxxxxxx>
> >>>>> CC: boris.ostrovsky@xxxxxxxxxx
> >>>>> CC: jgross@xxxxxxxx
> >>>>> ---
> >>>>>  drivers/xen/pvcalls-back.c | 109 
> >>>>> ++++++++++++++++++++++++++++++++++++++++++++-
> >>>>>  1 file changed, 108 insertions(+), 1 deletion(-)
> >>>>>
> >>>>> diff --git a/drivers/xen/pvcalls-back.c b/drivers/xen/pvcalls-back.c
> >>>>> index a75586e..f1173f4 100644
> >>>>> --- a/drivers/xen/pvcalls-back.c
> >>>>> +++ b/drivers/xen/pvcalls-back.c
> >>>>> @@ -65,6 +65,7 @@ struct pvcalls_ioworker {
> >>>>>  struct sock_mapping {
> >>>>>         struct list_head list;
> >>>>>         struct pvcalls_fedata *priv;
> >>>>> +       struct sockpass_mapping *sockpass;
> >>>>>         struct socket *sock;
> >>>>>         uint64_t id;
> >>>>>         grant_ref_t ref;
> >>>>> @@ -275,10 +276,79 @@ static int pvcalls_back_release(struct 
> >>>>> xenbus_device *dev,
> >>>>>  
> >>>>>  static void __pvcalls_back_accept(struct work_struct *work)
> >>>>>  {
> >>>>> +       struct sockpass_mapping *mappass = container_of(
> >>>>> +               work, struct sockpass_mapping, register_work);
> >>>>> +       struct sock_mapping *map;
> >>>>> +       struct pvcalls_ioworker *iow;
> >>>>> +       struct pvcalls_fedata *priv;
> >>>>> +       struct socket *sock;
> >>>>> +       struct xen_pvcalls_response *rsp;
> >>>>> +       struct xen_pvcalls_request *req;
> >>>>> +       int notify;
> >>>>> +       int ret = -EINVAL;
> >>>>> +       unsigned long flags;
> >>>>> +
> >>>>> +       priv = mappass->priv;
> >>>>> +       /* We only need to check the value of "cmd" atomically on read. 
> >>>>> */
> >>>>> +       spin_lock_irqsave(&mappass->copy_lock, flags);
> >>>>> +       req = &mappass->reqcopy;
> >>>>> +       if (req->cmd != PVCALLS_ACCEPT) {
> >>>>> +               spin_unlock_irqrestore(&mappass->copy_lock, flags);
> >>>>> +               return;
> >>>>> +       }
> >>>>> +       spin_unlock_irqrestore(&mappass->copy_lock, flags);
> >>>>
> >>>> What about:
> >>>>  req = &mappass->reqcopy;
> >>>>  if (ACCESS_ONCE(req->cmd) != PVCALLS_ACCEPT)
> >>>>          return;
> >>>>
> >>>> I can't see the need for taking a lock here.
> >>>
> >>> Sure, good idea
> >>>
> >>>
> >>>>> +
> >>>>> +       sock = sock_alloc();
> >>>>> +       if (sock == NULL)
> >>>>> +               goto out_error;
> >>>>> +       sock->type = mappass->sock->type;
> >>>>> +       sock->ops = mappass->sock->ops;
> >>>>> +
> >>>>> +       ret = inet_accept(mappass->sock, sock, O_NONBLOCK, true);
> >>>>> +       if (ret == -EAGAIN) {
> >>>>> +               sock_release(sock);
> >>>>> +               goto out_error;
> >>>>> +       }
> >>>>> +
> >>>>> +       map = pvcalls_new_active_socket(priv,
> >>>>> +                                       req->u.accept.id_new,
> >>>>> +                                       req->u.accept.ref,
> >>>>> +                                       req->u.accept.evtchn,
> >>>>> +                                       sock);
> >>>>> +       if (!map) {
> >>>>> +               sock_release(sock);
> >>>>> +               goto out_error;
> >>>>> +       }
> >>>>> +
> >>>>> +       map->sockpass = mappass;
> >>>>> +       iow = &map->ioworker;
> >>>>> +       atomic_inc(&map->read);
> >>>>> +       atomic_inc(&map->io);
> >>>>> +       queue_work_on(iow->cpu, iow->wq, &iow->register_work);
> >>>>> +
> >>>>> +out_error:
> >>>>> +       rsp = RING_GET_RESPONSE(&priv->ring, priv->ring.rsp_prod_pvt++);
> >>>>> +       rsp->req_id = req->req_id;
> >>>>> +       rsp->cmd = req->cmd;
> >>>>> +       rsp->u.accept.id = req->u.accept.id;
> >>>>> +       rsp->ret = ret;
> >>>>> +       RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&priv->ring, notify);
> >>>>> +       if (notify)
> >>>>> +               notify_remote_via_irq(priv->irq);
> >>>>> +
> >>>>> +       spin_lock_irqsave(&mappass->copy_lock, flags);
> >>>>> +       mappass->reqcopy.cmd = 0;
> >>>>> +       spin_unlock_irqrestore(&mappass->copy_lock, flags);
> >>>>
> >>>> ACCESS_ONCE(mappass->reqcopy.cmd) = 0;
> >>>
> >>> OK
> >>>
> >>>
> >>>>>  }
> >>>>>  
> >>>>>  static void pvcalls_pass_sk_data_ready(struct sock *sock)
> >>>>>  {
> >>>>> +       struct sockpass_mapping *mappass = sock->sk_user_data;
> >>>>> +
> >>>>> +       if (mappass == NULL)
> >>>>> +               return;
> >>>>> +
> >>>>> +       queue_work(mappass->wq, &mappass->register_work);
> >>>>>  }
> >>>>>  
> >>>>>  static int pvcalls_back_bind(struct xenbus_device *dev,
> >>>>> @@ -380,7 +450,44 @@ static int pvcalls_back_listen(struct 
> >>>>> xenbus_device *dev,
> >>>>>  static int pvcalls_back_accept(struct xenbus_device *dev,
> >>>>>                                struct xen_pvcalls_request *req)
> >>>>>  {
> >>>>> -       return 0;
> >>>>> +       struct pvcalls_fedata *priv;
> >>>>> +       struct sockpass_mapping *mappass;
> >>>>> +       int ret = -EINVAL;
> >>>>> +       struct xen_pvcalls_response *rsp;
> >>>>> +       unsigned long flags;
> >>>>> +
> >>>>> +       priv = dev_get_drvdata(&dev->dev);
> >>>>> +
> >>>>> +       mappass = radix_tree_lookup(&priv->socketpass_mappings,
> >>>>> +               req->u.accept.id);
> >>>>> +       if (mappass == NULL)
> >>>>> +               goto out_error;
> >>>>> +
> >>>>> +       /* 
> >>>>> +        * Limitation of the current implementation: only support one
> >>>>> +        * concurrent accept or poll call on one socket.
> >>>>> +        */
> >>>>> +       spin_lock_irqsave(&mappass->copy_lock, flags);
> >>>>> +       if (mappass->reqcopy.cmd != 0) {
> >>>>> +               spin_unlock_irqrestore(&mappass->copy_lock, flags);
> >>>>> +               ret = -EINTR;
> >>>>> +               goto out_error;
> >>>>> +       }
> >>>>> +
> >>>>> +       mappass->reqcopy = *req;
> >>>>
> >>>> This time you need the lock, however you should use:
> >>>>
> >>>> ACCESS_ONCE(mappass->reqcopy) = *req;
> >>>
> >>> I don't think that guarantees atomic accesses to the cmd field of the
> >>> struct. Shouldn't this be:
> >>>
> >>>   ACCESS_ONCE(mappass->reqcopy.cmd) = req->cmd;
> >>>   mappass->reqcopy = *req;
> >>
> >> Hmm, what if the frontend changes cmd between those two accesses?
> > 
> > This cannot happen because req is a copy of the guest request here.
> > However, it is possible that __pvcalls_back_accept is racing against
> > pvcalls_back_accept. In that case, I would need to make sure not only
> > that cmd is written atomically, but now that I am thinking about this,
> > that cmd is written *after* the rest of reqcopy: otherwise
> > __pvcalls_back_accept could see a partially updated reqcopy.
> > 
> > It would be possible to do this with atomic accesses and barriers, but
> > I am thinking that it is not worth the effort. I am tempted to roll back
> > to the previous version with spinlocks.
> 
> Okay. Maybe add a comment mentioning this possible race.

I'll do

 
> > 
> > 
> >> You either need another local buffer or you have to copy cmd via
> >> ACCESS_ONCE() and the rest of *req separately (seems not to be
> >> that hard: its just cmd, req_id and u).
> >>
> >> BTW: Maybe you should use READ_ONCE() and WRITE_ONCE() instead of
> >> ACCESS_ONCE(), as those seem to be preferred nowadays.

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
https://lists.xen.org/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.