[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [PATCH 2/4] pvSCSI driver



> pvSCSI frontend driver
> 
> Signed-off-by: Tomonari Horikoshi <t.horikoshi@xxxxxxxxxxxxxx>
> Signed-off-by: Jun Kamada <kama@xxxxxxxxxxxxxx>

> --- /dev/null Thu Jan 01 00:00:00 1970 +0000
> +++ b/drivers/xen/scsifront/scsifront.c       Fri May 30 15:07:51 2008 +0900

> +static void scsifront_cdb_cmd_done(struct vscsifrnt_info *info,
> +                    vscsiif_response_t *ring_res)
> +{
> +     struct scsi_cmnd *sc;
> +     uint32_t id;
> +     uint8_t sense_len;
> +
> +     id = ring_res->rqid;
> +     sc = (struct scsi_cmnd *)info->shadow[id].req_scsi_cmnd;
> +
> +     if (sc == NULL)
> +             BUG();
> +
> +     scsifront_gnttab_done(&info->shadow[id], id);
> +     add_id_to_freelist(info, id);
> +
> +     rmb();
What's this memory barrier supposed to be protecting?  It seems like
the rmb() in scsifront_cmd_done() should be sufficient, unless I'm
missing something.

> +     if (atomic_read(&info->abort_reset_cond) == VSCSI_IN_RESET) {
> +             sc->result = (DID_RESET << 16);
> +     } else {
> +             sc->result = ring_res->rslt;
> +     }
Hmm.  If a command succeeds while a reset is in progress, you're going
to tell the guest that it failed.  I'm not sure that that's always
entirely safe.  It seems like it would be easiest to just not fiddle
with the result at all here?

> +
> +     sc->resid  = 0;
> +
> +     if (ring_res->sense_len > VSCSIIF_SENSE_BUFFERSIZE)
> +             sense_len = VSCSIIF_SENSE_BUFFERSIZE;
> +     else
> +             sense_len = ring_res->sense_len;
> +
> +     if (sense_len)
> +             memcpy(sc->sense_buffer, ring_res->sense_buffer, sense_len);
> +
> +     sc->scsi_done(sc);
> +
> +     return;
> +}


> +int scsifront_schedule(void *data)
> +{
> +     struct vscsifrnt_info *info = (struct vscsifrnt_info *)data;
> +
> +     while (!kthread_should_stop()) {
> +             wait_event_interruptible(
> +                     info->wq,
> +                     info->waiting_resp || kthread_should_stop());
> +
> +             info->waiting_resp = 0;
> +             smp_mb();
> +
> +             if (scsifront_cmd_done(info))
> +                     info->waiting_resp = 1;
> +     }
> +
> +     info->kthread = NULL;
Hmm.  Is this necessary?  It looks wrong, because you now have a
window in which the kthread is running, but the rest of the driver
can't see it.  That could lead to the driver unloading while the
thread is still running, if you're incredibly unlucky.

> +
> +     return 0;
> +}


> +/* vscsi supports only device_reset, because it is each of LUNs */
> +static int scsifront_dev_reset_handler(struct scsi_cmnd *sc)
> +{
> +     struct Scsi_Host *host = sc->device->host;
> +     struct vscsifrnt_info *info =
> +             (struct vscsifrnt_info *) sc->device->host->hostdata;
> +
> +     vscsiif_request_t *ring_req;
> +     int err;
> +
> +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,12)
> +     spin_lock_irq(host->host_lock);
> +#endif
I think it would make sense to get rid of the #if's here.  I don't
think it's terribly useful to keep this stuff working on pre-2.6.18
kernels.

> +
> +     atomic_set(&info->abort_reset_cond, VSCSI_IN_RESET);
> +     while (RING_HAS_UNCONSUMED_RESPONSES(&info->ring)) {
> +             if (!scsifront_cmd_done(info))
> +                     break;
> +     }
What about requests which are currently pending at the backend?  They
can spontaneously turn into unconsumed responses, so just because
you've seen !RING_HAS_UNCONSUMED_RESPONSES doesn't imply that it'll
still be true a little bit later.

Actually, I'm not sure I understand what this loop is trying to do at
all.  Would you mind explaining the intended logic, please?

> +
> +     ring_req      = scsifront_pre_request(info);
> +     ring_req->act = VSCSIIF_ACT_SCSI_RESET;
> +
> +     info->shadow[ring_req->rqid].act = VSCSIIF_ACT_SCSI_RESET;
> +
> +     ring_req->channel = sc->device->channel;
> +     ring_req->id      = sc->device->id;
> +     ring_req->lun     = sc->device->lun;
> +     ring_req->cmd_len = sc->cmd_len;
> +
> +     if ( sc->cmd_len )
> +             memcpy(ring_req->cmnd, sc->cmnd, sc->cmd_len);
> +     else
> +             memset(ring_req->cmnd, 0, VSCSIIF_MAX_COMMAND_SIZE);
> +
> +     ring_req->sc_data_direction   = (uint8_t)sc->sc_data_direction;
> +     ring_req->timeout_per_command = (sc->timeout_per_command / HZ);
> +     ring_req->nr_segments         = 0;
> +
> +     spin_unlock_irq(host->host_lock);
> +     scsifront_do_request(info);     
> +     wait_event_interruptible(info->shadow[ring_req->rqid].wq_reset,
> +                      info->shadow[ring_req->rqid].wait_reset);
> +
> +     spin_lock_irq(host->host_lock);
> +
> +     err = info->shadow[ring_req->rqid].rslt_reset;
> +     atomic_set(&info->abort_reset_cond, 0);
> +
> +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,12)
> +     spin_unlock_irq(host->host_lock);
> +#endif
I'm not sure I understand why you had to re-acquire the lock here.
Nobody else should be touching your ring_req.

> +
> +     return (err);
> +}

> diff -r dced401c2d67 -r a5744f531ced drivers/xen/scsifront/xenbus.c
> --- /dev/null Thu Jan 01 00:00:00 1970 +0000
> +++ b/drivers/xen/scsifront/xenbus.c  Fri May 30 15:07:51 2008 +0900

> +static void scsifront_free(struct vscsifrnt_info *info)
> +{
> +     struct Scsi_Host *host = info->host;
> +
> +     if (scsi_host_get(host) != NULL)
> +             scsi_host_put(host);
What is this trying to achieve?  You get a reference and then
immediately drop it again, which seems a bit pointless.

> +
> +     if (info->ring_ref != GRANT_INVALID_REF) {
> +             gnttab_end_foreign_access(info->ring_ref,
> +                                     (unsigned long)info->ring.sring);
> +             info->ring_ref = GRANT_INVALID_REF;
> +             info->ring.sring = NULL;
> +     }
> +
> +     if (info->irq)
> +             unbind_from_irqhandler(info->irq, info);
> +     info->irq = 0;
> +}
> +
> +

> +static int scsifront_init_ring(struct vscsifrnt_info *info)
> +{
> +     struct xenbus_device *dev = info->dev;
> +     struct xenbus_transaction xbt;
> +     int err;
> +
> +     DPRINTK("%s\n",__FUNCTION__);
> +
> +     err = scsifront_alloc_ring(info);
> +     if (err)
> +             return err;
> +     DPRINTK("%u %u\n", info->ring_ref, info->evtchn);
> +
> +again:
> +     err = xenbus_transaction_start(&xbt);
> +     if (err) {
> +             xenbus_dev_fatal(dev, err, "starting transaction");
> +     }
> +
> +     err = xenbus_printf(xbt, dev->nodename, "ring-ref", "%u",
> +                             info->ring_ref);
> +     if (err) {
> +             xenbus_dev_fatal(dev, err, "%s", "writing ring-ref");
> +             goto fail;
> +     }
> +
> +     err = xenbus_printf(xbt, dev->nodename, "event-channel", "%u",
> +                             irq_to_evtchn_port(info->irq));
> +
> +     if (err) {
> +             xenbus_dev_fatal(dev, err, "%s", "writing event-channel");
> +             goto fail;
> +     }
> +
> +     err = xenbus_printf(xbt, dev->nodename, "vhostno", "%u",
> +                             info->host->host_no);
> +     if (err) {
> +             xenbus_dev_fatal(dev, err, "%s", "writing vhostno");
> +             goto fail;
> +     }
> +
> +     err = xenbus_transaction_end(xbt, 0);
> +     if (err) {
> +             if (err == -EAGAIN)
> +                     goto again;
> +             xenbus_dev_fatal(dev, err, "completing transaction");
You're going to return 0 here.  Did you mean to return err?

I think you might also want to call scsifront_free().

> +     } else
> +             xenbus_switch_state(dev, XenbusStateInitialised);
> +
> +     return 0;
> +
> +fail:
> +     xenbus_transaction_end(xbt, 1);
> +     /* free resource */
> +     scsifront_free(info);
> +     
> +     return err;
> +}
> +
> +
> +static int scsifront_probe(struct xenbus_device *dev,
> +                             const struct xenbus_device_id *id)
> +{
> +     struct vscsifrnt_info *info;
> +     struct Scsi_Host *host;
> +     int i, err = -ENOMEM;
> +     char name[DEFAULT_TASK_COMM_LEN];
> +
> +     host = scsi_host_alloc(&scsifront_sht, sizeof(*info));
> +     if (!host) {
> +             xenbus_dev_fatal(dev, err, "fail to allocate scsi host");
> +             return err;
> +     }
> +     info = (struct vscsifrnt_info *) host->hostdata;
> +     info->host = host;
> +
> +
> +     dev->dev.driver_data = info;
> +     info->dev  = dev;
> +
> +     for (i = 0; i < VSCSIIF_MAX_REQS; i++) {
> +             info->shadow[i].next_free = i + 1;
> +             init_waitqueue_head(&(info->shadow[i].wq_reset));
> +             info->shadow[i].wait_reset = 0;
> +     }
> +     info->shadow[VSCSIIF_MAX_REQS - 1].next_free = 0x0fff;
> +
> +     atomic_set(&info->abort_reset_cond, 0);
> +
> +     err = scsifront_init_ring(info);
> +     if (err) {
> +             scsi_host_put(host);
> +             return err;
> +     }
> +
> +     init_waitqueue_head(&info->wq);
> +     spin_lock_init(&info->io_lock);
> +     spin_lock_init(&info->shadow_lock);
> +
> +     snprintf(name, DEFAULT_TASK_COMM_LEN, "vscsiif.%d", 
> info->host->host_no);
> +
> +     info->kthread = kthread_run(scsifront_schedule, info, name);
> +     if (IS_ERR(info->kthread)) {
> +             err = PTR_ERR(info->kthread);
> +             info->kthread = NULL;
You're going to return success here if you fail to start the kthread.
Was that deliberate?

> +     }
> +
> +     host->max_id      = VSCSIIF_MAX_TARGET;
> +     host->max_channel = 0;
> +     host->max_lun     = VSCSIIF_MAX_LUN;
> +     host->max_sectors = (VSCSIIF_SG_TABLESIZE * PAGE_SIZE / 512);
Hmm.  If Linux submits a maximally-sized request which isn't
page-aligned then you'll need VSCSIIF_SG_TABLESIZE+1 SG entries, whcih
won't fit.  I think you probably wanted to say
(VSCSIIF_SG_TABLESIZE-1)*PAGE_SIZE/512 here.

> +
> +     err = scsi_add_host(host, &dev->dev);
> +     if (err) {
> +             printk(KERN_ERR "scsifront: fail to add scsi host %d\n", err);
> +             return err;
> +     }
> +
> +     xenbus_switch_state(dev, XenbusStateInitialised);
> +
> + #if 0
> +     /* All SCSI device scan */
> +     scsi_scan_host(host);
> +
> +     err = xenbus_printf(XBT_NIL, dev->nodename, "hotplug-status", "%s",
> +                                                             "connected");
> +     if (err) {
> +             xenbus_dev_fatal(dev, err, "%s", "writing hotplug-status");
> +             return err;
> +     }
> + #endif
Was this meant to be left in?

> +     return 0;
> +}
> +

> +static int scsifront_disconnect(struct vscsifrnt_info *info)
> +{
> +     struct xenbus_device *dev = info->dev;
> +     struct Scsi_Host *host = info->host;
> +
> +     unsigned long flags;
> +
> +     DPRINTK("%s: %s disconnect\n",__FUNCTION__ ,dev->nodename);
> +
> +     spin_lock_irqsave(host->host_lock, flags);
> +     while (RING_HAS_UNCONSUMED_RESPONSES(&info->ring)) {
> +             if (!scsifront_cmd_done(info))
> +                     break;
> +     }
> +
Hmm.  You spin until there are no more unconsumed responses, but what
stops the backend turning requests into responses?  For that matter,
what stops Linux from submitting more requests?

> +     spin_unlock_irqrestore(host->host_lock, flags);
> +
> +     spin_lock(&info->io_lock);
> +
> +     scsi_remove_host(host);
> +     scsi_host_put(host);
Careful.  You're holding a spinlock, so can't reschedule, but the
first thing scsi_remove_host() does is acquire a mutex.  That isn't
really allowed.

I'm not sure what the lock is trying to protect here.  I think you
hold host references everywhere you need one, so you should be able to
do the scsi_host_put() without holding any additional locks.

Also, your info area is allocated off of the host structure, so you
shouldn't really touch it after you've put it, unless you have some
other reason to think that it's still alive.  That means you can't
release the lock here.

> +
> +     spin_unlock(&info->io_lock);
> +
> +
> +     xenbus_frontend_closed(dev);
> +
> +     return 0;
> +}


> +static void scsifront_backend_changed(struct xenbus_device *dev,
> +                             enum xenbus_state backend_state)
> +{
> +     struct vscsifrnt_info *info = dev->dev.driver_data;
> +
> +     DPRINTK("%p %u %u\n", dev, dev->state, backend_state);
> +
> +     switch (backend_state) {
> +     case XenbusStateUnknown:
> +     case XenbusStateInitialising:
> +     case XenbusStateInitWait:
> +     case XenbusStateClosed:
> +             break;
> +
> +     case XenbusStateInitialised:
> +             break;
> +
> +     case XenbusStateConnected:
> +             if (xenbus_read_driver_state(dev->nodename) ==
> +                     XenbusStateInitialised) {
> +                     scsifront_do_lun_hotplug(info, VSCSIFRONT_OP_ADD_LUN);
> +             }
> +             xenbus_switch_state(dev, XenbusStateConnected);
> +             break;
> +
> +     case XenbusStateClosing:
> +             scsifront_disconnect(info);
> +             break;
> +
> +     case XenbusStateReconfiguring:
> +             scsifront_do_lun_hotplug(info, VSCSIFRONT_OP_DEL_LUN);
> +             xenbus_switch_state(dev, XenbusStateReconfiguring);
> +             break;
> +
> +     case XenbusStateReconfigured:
> +             scsifront_do_lun_hotplug(info, VSCSIFRONT_OP_ADD_LUN);
> +             xenbus_switch_state(dev, XenbusStateConnected);
> +             break;
I don't understand what's going on here.  Would you mind explaining
how LUN hot add/remove is supposed to work at the xenbus level,
please?

> +     }
> +}

> +static struct xenbus_driver scsifront_driver = {
> +     .name                   = "vscsi",
> +     .owner                  = THIS_MODULE,
> +     .ids                    = scsifront_ids,
> +     .probe                  = scsifront_probe,
> +     .remove                 = scsifront_remove,
> +/*   .resume                 = scsifront_resume, */
Okay, so this version doesn't do suspend/resume or migrate?  I guess
that's reasonable while the xend support is still missing.

> +     .otherend_changed       = scsifront_backend_changed,
> +};

Attachment: signature.asc
Description: Digital signature

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.