[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Minios-devel] [UNIKRAFT PATCH 3/6] lib/devfs: Adapt imported devfs to Unikraft



Hi,

please see the comments inline

Cheers, Yuri.

"Vlad-Andrei BĂDOIU (78692)" <vlad_andrei.badoiu@xxxxxxxxxxxxxxx>
writes:

> Signed-off-by: Vlad-Andrei Badoiu <vlad_andrei.badoiu@xxxxxxxxxxxxxxx>
> ---
>  lib/devfs/devfs.h                    |   2 +-
>  lib/devfs/devfs_vnops.c              | 122 ++++++++++++---
>  lib/devfs/device.c                   | 218 +++++----------------------
>  lib/vfscore/include/vfscore/device.h |   9 +-
>  4 files changed, 141 insertions(+), 210 deletions(-)
>
> diff --git a/lib/devfs/devfs.h b/lib/devfs/devfs.h
> index bdf50609..c2197f79 100644
> --- a/lib/devfs/devfs.h
> +++ b/lib/devfs/devfs.h
> @@ -30,7 +30,7 @@
>  #ifndef _DEVFS_H
>  #define _DEVFS_H
>  
> -#include <assert.h>
> +#include <uk/assert.h>
>  
>  /* #define DEBUG_DEVFS 1 */
>  
> diff --git a/lib/devfs/devfs_vnops.c b/lib/devfs/devfs_vnops.c
> index 1e69e4a6..1ec0d553 100644
> --- a/lib/devfs/devfs_vnops.c
> +++ b/lib/devfs/devfs_vnops.c
> @@ -31,6 +31,10 @@
>   * devfs - device file system.
>   */
>  
> +#ifndef _GNU_SOURCE
> +#define _GNU_SOURCE
> +#endif
> +
>  #include <sys/stat.h>
>  
>  #include <ctype.h>
> @@ -41,27 +45,34 @@
>  #include <limits.h>
>  #include <fcntl.h>
>  #include <stdio.h>
> +#include <sys/mount.h>
> +
> +#include <dirent.h>
> +#include <vfscore/prex.h>
> +#include <vfscore/device.h>
> +#include <vfscore/vnode.h>
> +#include <vfscore/mount.h>
> +#include <vfscore/dentry.h>
> +#include <vfscore/file.h>
>  
> -#include <osv/prex.h>
> -#include <osv/device.h>
> -#include <osv/vnode.h>
> -#include <osv/mount.h>
> -#include <osv/dentry.h>
> +#include <vfscore/fs.h>
>  
>  #include "devfs.h"
>  
> +#include <uk/ctors.h>
> +
>  #ifdef DEBUG_DEVFS
> -#define DPRINTF(a)   dprintf a
> +#define DPRINTF(X)   uk_pr_debug X
>  #else
> -#define DPRINTF(a)   do {} while (0)
> +#define DPRINTF(X)
>  #endif
>  
> -#define ASSERT(e)    assert(e)
> +#define ASSERT(e)
I see you are using UK_ASSERT directly. Which is reasonable. But let's
remove this declaration in such a case.

>  
>  static uint64_t inode_count = 1; /* inode 0 is reserved to root */
>  
>  static int
> -devfs_open(struct file *fp)
> +devfs_open(struct vfscore_file *fp)
>  {
>       struct vnode *vp = fp->f_dentry->d_vnode;
>       char *path = fp->f_dentry->d_path;
> @@ -90,7 +101,7 @@ devfs_open(struct file *fp)
>  }
>  
>  static int
> -devfs_close(struct vnode *vp, struct file *fp)
> +devfs_close(struct vnode *vp, struct vfscore_file *fp)
>  {
>  
>       DPRINTF(("devfs_close: fp=%x\n", fp));
> @@ -98,27 +109,27 @@ devfs_close(struct vnode *vp, struct file *fp)
>       if (!strcmp(fp->f_dentry->d_path, "/")) /* root ? */
>               return 0;
>  
> -     return device_close((device*)vp->v_data);
> +     return device_close((struct device*)vp->v_data);
>  }
>  
>  static int
> -devfs_read(struct vnode *vp, struct file *fp, struct uio *uio, int ioflags)
> +devfs_read(struct vnode *vp, struct vfscore_file *fp, struct uio *uio, int 
> ioflags)
>  {
> -     return device_read((device*)vp->v_data, uio, ioflags);
> +     return device_read((struct device*)vp->v_data, uio, ioflags);
>  }
>  
>  static int
>  devfs_write(struct vnode *vp, struct uio *uio, int ioflags)
>  {
> -     return device_write((device*)vp->v_data, uio, ioflags);
> +     return device_write((struct device*)vp->v_data, uio, ioflags);
>  }
>  
>  static int
> -devfs_ioctl(struct vnode *vp, struct file *fp, u_long cmd, void *arg)
> +devfs_ioctl(struct vnode *vp, struct vfscore_file *fp, u_long cmd, void *arg)
>  {
>       int error;
>  
> -     error = device_ioctl((device*)vp->v_data, cmd, arg);
> +     error = device_ioctl((struct device*)vp->v_data, cmd, arg);
>       DPRINTF(("devfs_ioctl: cmd=%x\n", cmd));
>       return error;
>  }
> @@ -149,7 +160,7 @@ devfs_lookup(struct vnode *dvp, char *name, struct vnode 
> **vpp)
>                       break;
>               i++;
>       }
> -     if (vget(dvp->v_mount, inode_count++, &vp)) {
> +     if (vfscore_vget(dvp->v_mount, inode_count++, &vp)) {
>               /* found in cache */
>               *vpp = vp;
>               return 0;
> @@ -171,7 +182,7 @@ devfs_lookup(struct vnode *dvp, char *name, struct vnode 
> **vpp)
>   * @vp: vnode of the directory.
>   */
>  static int
> -devfs_readdir(struct vnode *vp, struct file *fp, struct dirent *dir)
> +devfs_readdir(struct vnode *vp, struct vfscore_file *fp, struct dirent *dir)
>  {
>       struct devinfo info;
>       int error, i;
> @@ -204,15 +215,10 @@ devfs_readdir(struct vnode *vp, struct file *fp, struct 
> dirent *dir)
>  static int
>  devfs_unmount(struct mount *mp, int flags)
>  {
> -     release_mp_dentries(mp);
> +     //release_mp_dentries(mp);
1) I believe we need this call.

2) Until now we used c-style comments "/* blah */" rather then c++
   "//blah".

3) Better not to leave code commented if possible. Either use it or
   deleted it. Unless you know that most likely it will needed later.

>       return 0;
>  }
>  
> -int
> -devfs_init(void)
> -{
> -     return 0;
> -}
>  
>  static int
>  devfs_getattr(struct vnode *vnode, struct vattr *attr)
> @@ -222,6 +228,39 @@ devfs_getattr(struct vnode *vnode, struct vattr *attr)
>       return 0;
>  }
>  
> +int
> +vop_nullop(void)
> +{
> +     return 0;
> +}
> +
> +int
> +vop_einval(void)
> +{
> +     return EINVAL;
> +}
> +
> +int
> +vop_eperm(void)
> +{
> +     return EPERM;
> +}
> +
> +int
> +vop_erofs(void)
> +{
> +                 return EROFS;
> +}
> +
> +int
> +vfs_nullop(void)
> +{
> +                 return 0;
> +}
1) Vfscore provides vfscore_nullop and vfscore_vop_nullop for these
   purposes.

2) I know you are fixing checpatch issues later, but at least the new
   code you are adding in this patch should comply with coding
   standards. To not to make the checpatch fixes to grow where it is not
   necessary
> +
> +
> +
> +
>  #define devfs_mount  ((vfsop_mount_t)vfs_nullop)
>  #define devfs_sync   ((vfsop_sync_t)vfs_nullop)
>  #define devfs_vget   ((vfsop_vget_t)vfs_nullop)
> @@ -265,7 +304,7 @@ struct vnops devfs_vnops = {
>       devfs_inactive,         /* inactive */
>       devfs_truncate,         /* truncate */
>       devfs_link,             /* link */
> -     (vnop_cache_t) nullptr, /* arc */
> +     (vnop_cache_t) NULL, /* arc */
>       devfs_fallocate,        /* fallocate */
>       devfs_readlink,         /* read link */
>       devfs_symlink,          /* symbolic link */
> @@ -282,3 +321,36 @@ struct vfsops devfs_vfsops = {
>       devfs_statfs,           /* statfs */
>       &devfs_vnops,           /* vnops */
>  };
> +
> +static struct vfscore_fs_type fs_devfs = {
> +    .vs_name = "devfs",
> +    .vs_init = NULL,
> +    .vs_op = &devfs_vfsops,
> +};
> +
> +UK_FS_REGISTER(fs_devfs);
> +
> +
> +__constructor_prio(101) static void devfs_init(void)
> +{
> +     int ret;
> +   
> +     ret = mount("", "/", "ramfs", 0, NULL);
> +     if (ret != 0) {
> +             DPRINTF(("Failed to mount / in %s\n", __func__));
> +             return;
> +     }
This means devfs depends on ramfs.

> +
> +     ret =  mkdir("/dev", S_IRWXU);
> +     if (ret != 0) {
> +             DPRINTF(("Failed to mkdir /dev in %s\n", __func__));
> +             return;
> +     }
> +
> +     ret = mount("", "/dev", "devfs", 0, NULL);
> +     if (ret != 0) {
> +             DPRINTF(("Failed to mount /dev as devfs in %s\n", __func__));
> +             return;
> +     }
> +
> +}
> diff --git a/lib/devfs/device.c b/lib/devfs/device.c
> index a0cd9768..50522076 100644
> --- a/lib/devfs/device.c
> +++ b/lib/devfs/device.c
> @@ -45,21 +45,15 @@
>  #include <stdio.h>
>  #include <unistd.h>
>  #include <errno.h>
> -#include <assert.h>
> +#include <uk/assert.h>
>  #include <string.h>
>  
> -#include <osv/prex.h>
> -#include <osv/mutex.h>
> -#include <osv/device.h>
> -#include <osv/debug.h>
> -#include <osv/buf.h>
> -
> -#include <geom/geom_disk.h>
> -
> -mutex sched_mutex;
> -#define sched_lock() sched_mutex.lock()
> -#define sched_unlock()       sched_mutex.unlock()
> +#include <vfscore/device.h>
> +#include <vfscore/prex.h>
> +#include <uk/essentials.h>
> +#include <uk/mutex.h>
>  
> +static struct uk_mutex devfs_lock = UK_MUTEX_INITIALIZER(devfs_lock);
>  
>  /* list head of the devices */
>  static struct device *device_list = NULL;
> @@ -92,85 +86,31 @@ struct partition_table_entry {
>       uint32_t total_sectors;
>  } __attribute__((packed));
>  
> -/*
> - * read_partition_table - given a device @dev, create one subdevice per 
> partition
> - * found in that device.
> - *
> - * This function will read a partition table from the canonical location of 
> the
> - * device pointed by @dev. For each partition found, a new device will be
> - * created. The newly created device will have most of its data copied from
> - * @dev, except for its name, offset and size.
> - */
> -void read_partition_table(struct device *dev)
> -{
> -     struct buf *bp;
> -     unsigned long offset;
> -     int index;
> -
> -     if (bread(dev, 0, &bp) != 0) {
> -             debugf("read_partition_table failed for %s\n", dev->name);
> -             return;
> -     }
> -
> -     sched_lock();
> -     // A real partition table (MBR) ends in the two bytes 0x55, 0xAA (see
> -     // arch/x64/boot16.S on where we put those on the OSv image). If we
> -     // don't find those, this is an unpartitioned disk.
> -     if (((unsigned char*)bp->b_data)[510] == 0x55 &&
> -         ((unsigned char*)bp->b_data)[511] == 0xAA)
> -     for (offset = 0x1be, index = 0; offset < 0x1fe; offset += 0x10, 
> index++) {
> -             char dev_name[MAXDEVNAME];
> -             struct device *new_dev;
> -
> -             auto* entry = static_cast<struct 
> partition_table_entry*>(bp->b_data + offset);
> -
> -             if (entry->system_id == 0) {
> -                     continue;
> -             }
> -
> -             if (entry->starting_sector == 0) {
> -                     continue;
> -             }
> -
> -             snprintf(dev_name, MAXDEVNAME, "%s.%d", dev->name, index);
> -             new_dev = device_create(dev->driver, dev_name, dev->flags);
> -             free(new_dev->private_data);
> -
> -             new_dev->offset = (off_t)entry->rela_sector << 9;
> -             new_dev->size = (off_t)entry->total_sectors << 9;
> -             new_dev->max_io_size = dev->max_io_size;
> -             new_dev->private_data = dev->private_data;
> -             device_set_softc(new_dev, device_get_softc(dev));
> -     }
> -
> -     sched_unlock();
> -     brelse(bp);
> -}
>  
>  void device_register(struct device *dev, const char *name, int flags)
>  {
>       size_t len;
>       void *priv = NULL;
> -
> -     assert(dev->driver != NULL);
> +     
> +     UK_ASSERT(dev->driver != NULL);
>  
>       /* Check the length of name. */
>       len = strnlen(name, MAXDEVNAME);
>       if (len == 0 || len >= MAXDEVNAME)
>               return;
>  
> -     sched_lock();
> +     uk_mutex_lock(&devfs_lock);
>  
>       /* Check if specified name is already used. */
> -     if (device_lookup(name) != NULL)
> -             sys_panic("duplicate device");
> +     //if (device_lookup(name) != NULL)
> +             //sys_panic("duplicate device");
Looks like this was a reasonable check
>  
>       /*
>        * Allocate a device and device private data.
>        */
>       if (dev->driver->devsz != 0) {
>               if ((priv = malloc(dev->driver->devsz)) == NULL)
> -                     sys_panic("devsz");
> +                     UK_CRASH("devsz");
>               memset(priv, 0, dev->driver->devsz);
>       }
>  
> @@ -183,8 +123,8 @@ void device_register(struct device *dev, const char 
> *name, int flags)
>       dev->next = device_list;
>       dev->max_io_size = UINT_MAX;
>       device_list = dev;
> -
> -     sched_unlock();
> +     
> +     uk_mutex_unlock(&devfs_lock);   
>  }
>  
>  
> @@ -201,7 +141,7 @@ device_create(struct driver *drv, const char *name, int 
> flags)
>       struct device *dev;
>       size_t len;
>  
> -     assert(drv != NULL);
> +     UK_ASSERT(drv != NULL);
>  
>       /* Check the length of name. */
>       len = strnlen(name, MAXDEVNAME);
> @@ -211,27 +151,14 @@ device_create(struct driver *drv, const char *name, int 
> flags)
>       /*
>        * Allocate a device structure.
>        */
> -     if ((dev = new device) == NULL)
> -             sys_panic("device_create");
> +     if ((dev = malloc(sizeof(struct device))) == NULL)
> +             UK_CRASH("device_create");
>  
>      dev->driver = drv;
>      device_register(dev, name, flags);
>       return dev;
>  }
>  
> -#if 0
> -/*
> - * Return device's private data.
> - */
> -static void *
> -device_private(struct device *dev)
> -{
> -     assert(dev != NULL);
> -     assert(dev->private_data != NULL);
> -
> -     return dev->private_data;
> -}
> -#endif
>  
>  /*
>   * Return true if specified device is valid.
> @@ -260,13 +187,13 @@ static int
>  device_reference(struct device *dev)
>  {
>  
> -     sched_lock();
> +     uk_mutex_lock(&devfs_lock);
>       if (!device_valid(dev)) {
> -             sched_unlock();
> +             uk_mutex_unlock(&devfs_lock);
>               return ENODEV;
>       }
>       dev->refcnt++;
> -     sched_unlock();
> +     uk_mutex_unlock(&devfs_lock);
>       return 0;
>  }
>  
> @@ -279,9 +206,10 @@ device_release(struct device *dev)
>  {
>       struct device **tmp;
>  
> -     sched_lock();
> +     uk_mutex_lock(&devfs_lock);
> +
>       if (--dev->refcnt > 0) {
> -             sched_unlock();
> +             uk_mutex_unlock(&devfs_lock);
>               return;
>       }
>       /*
> @@ -293,22 +221,22 @@ device_release(struct device *dev)
>                       break;
>               }
>       }
> -     delete dev;
> -     sched_unlock();
> +     free(dev);
> +     uk_mutex_unlock(&devfs_lock);
>  }
>  
>  int
>  device_destroy(struct device *dev)
>  {
>  
> -     sched_lock();
> +     uk_mutex_lock(&devfs_lock);
>       if (!device_valid(dev)) {
> -             sched_unlock();
> +             uk_mutex_unlock(&devfs_lock);
>               return ENODEV;
>       }
>       dev->active = 0;
> +     uk_mutex_unlock(&devfs_lock);
>       device_release(dev);
> -     sched_unlock();
>       return 0;
>  }
>  
> @@ -329,20 +257,20 @@ device_open(const char *name, int mode, struct device 
> **devp)
>       struct device *dev;
>       int error;
>  
> -     sched_lock();
> +     uk_mutex_lock(&devfs_lock);
>       if ((dev = device_lookup(name)) == NULL) {
> -             sched_unlock();
> +             uk_mutex_unlock(&devfs_lock);
>               return ENXIO;
>       }
> +     uk_mutex_unlock(&devfs_lock);
> +
This introduces a possible race condition while destroying a
device. Yeah, we probably will never destroy a device, but if we do,
this could cost hours of debugging.

Because n this case doing it properly costs very little effort, I would
suggest to add a function "device_reference_locked" with
"UK_ASSERT(uk_mutex_is_locked(&devfs_locked))" inside.


>       error = device_reference(dev);
>       if (error) {
> -             sched_unlock();
>               return error;
>       }
> -     sched_unlock();
>  
>       ops = dev->driver->devops;
> -     assert(ops->open != NULL);
> +     UK_ASSERT(ops->open != NULL);
>       error = (*ops->open)(dev, mode);
>       *devp = dev;
>  
> @@ -366,7 +294,7 @@ device_close(struct device *dev)
>               return error;
>  
>       ops = dev->driver->devops;
> -     assert(ops->close != NULL);
> +     UK_ASSERT(ops->close != NULL);
>       error = (*ops->close)(dev);
>  
>       device_release(dev);
> @@ -383,7 +311,7 @@ device_read(struct device *dev, struct uio *uio, int 
> ioflags)
>               return error;
>  
>       ops = dev->driver->devops;
> -     assert(ops->read != NULL);
> +     UK_ASSERT(ops->read != NULL);
>       error = (*ops->read)(dev, uio, ioflags);
>  
>       device_release(dev);
> @@ -400,7 +328,7 @@ device_write(struct device *dev, struct uio *uio, int 
> ioflags)
>               return error;
>  
>       ops = dev->driver->devops;
> -     assert(ops->write != NULL);
> +     UK_ASSERT(ops->write != NULL);
>       error = (*ops->write)(dev, uio, ioflags);
>  
>       device_release(dev);
> @@ -424,80 +352,13 @@ device_ioctl(struct device *dev, u_long cmd, void *arg)
>               return error;
>  
>       ops = dev->driver->devops;
> -     assert(ops->ioctl != NULL);
> +     UK_ASSERT(ops->ioctl != NULL);
>       error = (*ops->ioctl)(dev, cmd, arg);
>  
>       device_release(dev);
>       return error;
>  }
>  
> -#if 0
> -/*
> - * Device control - devctl is similar to ioctl, but is invoked from
> - * other device driver rather than from user application.
> - */
> -static int
> -device_control(struct device *dev, u_long cmd, void *arg)
> -{
> -     struct devops *ops;
> -     int error;
> -
> -     assert(dev != NULL);
> -
> -     sched_lock();
> -     ops = dev->driver->devops;
> -     assert(ops->devctl != NULL);
> -     error = (*ops->devctl)(dev, cmd, arg);
> -     sched_unlock();
> -     return error;
> -}
> -
> -/*
> - * device_broadcast - broadcast devctl command to all device objects.
> - *
> - * If "force" argument is true, we will continue command
> - * notification even if some driver returns an error. In this
> - * case, this routine returns EIO error if at least one driver
> - * returns an error.
> - *
> - * If force argument is false, a kernel stops the command processing
> - * when at least one driver returns an error. In this case,
> - * device_broadcast will return the error code which is returned
> - * by the driver.
> - */
> -static int
> -device_broadcast(u_long cmd, void *arg, int force)
> -{
> -     struct device *dev;
> -     struct devops *ops;
> -     int error, retval = 0;
> -
> -     sched_lock();
> -
> -     for (dev = device_list; dev != NULL; dev = dev->next) {
> -             /*
> -              * Call driver's devctl() routine.
> -              */
> -             ops = dev->driver->devops;
> -             if (ops == NULL)
> -                     continue;
> -
> -             assert(ops->devctl != NULL);
> -             error = (*ops->devctl)(dev, cmd, arg);
> -             if (error) {
> -                     if (force)
> -                             retval = EIO;
> -                     else {
> -                             retval = error;
> -                             break;
> -                     }
> -             }
> -     }
> -     sched_unlock();
> -     return retval;
> -}
> -#endif
> -
>  /*
>   * Return device information.
>   */
> @@ -509,7 +370,7 @@ device_info(struct devinfo *info)
>       struct device *dev;
>       int error = ESRCH;
>  
> -     sched_lock();
> +     uk_mutex_lock(&devfs_lock);
>       for (dev = device_list; dev != NULL; dev = dev->next) {
>               if (i++ == target) {
>                       info->cookie = i;
> @@ -520,7 +381,7 @@ device_info(struct devinfo *info)
>                       break;
>               }
>       }
> -     sched_unlock();
> +     uk_mutex_unlock(&devfs_lock);
>       return error;
>  }
>  
> @@ -535,3 +396,4 @@ nullop(void)
>  {
>       return 0;
>  }
> +
> diff --git a/lib/vfscore/include/vfscore/device.h 
> b/lib/vfscore/include/vfscore/device.h
> index 16d2e470..bcb33c33 100644
> --- a/lib/vfscore/include/vfscore/device.h
> +++ b/lib/vfscore/include/vfscore/device.h
> @@ -30,16 +30,15 @@
>  #ifndef _DEVICE_H
>  #define _DEVICE_H
>  
> -#include <sys/cdefs.h>
>  #include <sys/types.h>
>  
> -#include <osv/uio.h>
> -
> -__BEGIN_DECLS
> +#include <vfscore/uio.h>
>  
>  #define MAXDEVNAME   12
>  #define DO_RWMASK    0x3
>  
> +typedef unsigned long u_long;
> +
Let's avoid this typedef. Could you please replace u_long with 'unsigned
long' in the code?

>  struct bio;
>  struct device;
>  
> @@ -208,6 +207,4 @@ int device_destroy(struct device *dev);
>  void device_register(struct device *device, const char *name, int flags);
>  void read_partition_table(struct device *device);
>  
> -__END_DECLS
> -
>  #endif /* !_DEVICE_H */
> -- 
> 2.20.1
>

-- 
Yuri Volchkov
Software Specialist

NEC Europe Ltd
Kurfürsten-Anlage 36
D-69115 Heidelberg

_______________________________________________
Minios-devel mailing list
Minios-devel@xxxxxxxxxxxxxxxxxxxx
https://lists.xenproject.org/mailman/listinfo/minios-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.