[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [PATCH V9 3/8] Introduce HostPCIDevice to access a pci device on the host.



On Wed, Mar 21, 2012 at 06:29:00PM +0000, Anthony PERARD wrote:
> Signed-off-by: Anthony PERARD <anthony.perard@xxxxxxxxxx>
> Acked-by: Stefano Stabellini <stefano.stabellini@xxxxxxxxxxxxx>

So this interface is really LinuxSysfsPCIDevice.
For example the assumption that you can just open
device by pci address is broken with vfio.
Domain number is also not something anyone
besides linux knows about.

If I were you I would just call it xen- ....
and if it comes in handy it can be later renamed.

> ---
>  Makefile.target      |    3 +
>  hw/host-pci-device.c |  278 
> ++++++++++++++++++++++++++++++++++++++++++++++++++
>  hw/host-pci-device.h |   75 ++++++++++++++
>  3 files changed, 356 insertions(+), 0 deletions(-)
>  create mode 100644 hw/host-pci-device.c
>  create mode 100644 hw/host-pci-device.h
> 
> diff --git a/Makefile.target b/Makefile.target
> index 63cf769..0ccfd5b 100644
> --- a/Makefile.target
> +++ b/Makefile.target
> @@ -232,6 +232,9 @@ obj-$(CONFIG_NO_XEN) += xen-stub.o
>  
>  obj-i386-$(CONFIG_XEN) += xen_platform.o
>  
> +# Xen PCI Passthrough
> +obj-i386-$(CONFIG_XEN_PCI_PASSTHROUGH) += host-pci-device.o
> +
>  # Inter-VM PCI shared memory
>  CONFIG_IVSHMEM =
>  ifeq ($(CONFIG_KVM), y)
> diff --git a/hw/host-pci-device.c b/hw/host-pci-device.c
> new file mode 100644
> index 0000000..3dacb30
> --- /dev/null
> +++ b/hw/host-pci-device.c
> @@ -0,0 +1,278 @@
> +/*
> + * Copyright (C) 2011       Citrix Ltd.
> + *
> + * This work is licensed under the terms of the GNU GPL, version 2.  See
> + * the COPYING file in the top-level directory.
> + *
> + */
> +
> +#include "qemu-common.h"
> +#include "host-pci-device.h"
> +
> +#define PCI_MAX_EXT_CAP \
> +    ((PCIE_CONFIG_SPACE_SIZE - PCI_CONFIG_SPACE_SIZE) / (PCI_CAP_SIZEOF + 4))

namespace pollution.
name all things HOST_PCI_....

in this case, open-coding will make things clearer.


> +
> +enum error_code {

seems unused. So why name the type?

> +    ERROR_SYNTAX = 1,

We return -1 on error, just do that and you won't need ERROR_SYNTAX.

> +};
> +
> +static int path_to(const HostPCIDevice *d,
> +                   const char *name, char *buf, ssize_t size)
> +{
> +    return snprintf(buf, size, "/sys/bus/pci/devices/%04x:%02x:%02x.%x/%s",
> +                    d->domain, d->bus, d->dev, d->func, name);
> +}

users ignore return value. Also, want to check no overflow
and assert?

> +
> +static int get_resource(HostPCIDevice *d)
> +{
> +    int i, rc = 0;
> +    FILE *f;
> +    char path[PATH_MAX];
> +    unsigned long long start, end, flags, size;
> +
> +    path_to(d, "resource", path, sizeof (path));

I think this might not fit, snprintf needs an extra byte for \0.

> +    f = fopen(path, "r");
> +    if (!f) {
> +        fprintf(stderr, "Error: Can't open %s: %s\n", path, strerror(errno));
> +        return -errno;
> +    }
> +
> +    for (i = 0; i < PCI_NUM_REGIONS; i++) {
> +        if (fscanf(f, "%llx %llx %llx", &start, &end, &flags) != 3) {

People mentioned that scanf is not a good way to parse input.
Applies here.

> +            fprintf(stderr, "Error: Syntax error in %s\n", path);
> +            rc = ERROR_SYNTAX;
> +            break;
> +        }
> +        if (start) {
> +            size = end - start + 1;
> +        } else {
> +            size = 0;
> +        }
> +
> +        if (i < PCI_ROM_SLOT) {
> +            d->io_regions[i].base_addr = start;
> +            d->io_regions[i].size = size;
> +            d->io_regions[i].flags = flags;
> +        } else {
> +            d->rom.base_addr = start;
> +            d->rom.size = size;
> +            d->rom.flags = flags;
> +        }
> +    }
> +
> +    fclose(f);
> +    return rc;
> +}
> +
> +static int get_hex_value(HostPCIDevice *d, const char *name,
> +                         unsigned long *pvalue)

why long?

> +{
> +    char path[PATH_MAX];
> +    FILE *f;
> +    unsigned long value;
> +
> +    path_to(d, name, path, sizeof (path));
> +    f = fopen(path, "r");
> +    if (!f) {
> +        fprintf(stderr, "Error: Can't open %s: %s\n", path, strerror(errno));
> +        return -errno;
> +    }
> +    if (fscanf(f, "%lx\n", &value) != 1) {
> +        fprintf(stderr, "Error: Syntax error in %s\n", path);
> +        fclose(f);
> +        return ERROR_SYNTAX;
> +    }
> +    fclose(f);
> +    *pvalue = value;
> +    return 0;
> +}
> +
> +static bool pci_dev_is_virtfn(HostPCIDevice *d)
> +{
> +    char path[PATH_MAX];
> +    struct stat buf;
> +
> +    path_to(d, "physfn", path, sizeof (path));
> +    return !stat(path, &buf);
> +}
> +

Don't start names with pci_.
It would also be better to avoid things like path_to IMO.

> +static int host_pci_config_fd(HostPCIDevice *d)

So this opens if needed, and returns.
Why not explicitly open on get?
then you won't need these hacks.

> +{
> +    char path[PATH_MAX];
> +
> +    if (d->config_fd < 0) {
> +        path_to(d, "config", path, sizeof (path));

sizeof path

> +        d->config_fd = open(path, O_RDWR);
> +        if (d->config_fd < 0) {
> +            fprintf(stderr, "HostPCIDevice: Can not open '%s': %s\n",
> +                    path, strerror(errno));

strerror is not thread safe

> +        }
> +    }
> +    return d->config_fd;
> +}
> +static int host_pci_config_read(HostPCIDevice *d, int pos, void *buf, int 
> len)
> +{
> +    int fd = host_pci_config_fd(d);

You open file on each access?

> +    int res = 0;

why initialize here?

> +
> +again:
> +    res = pread(fd, buf, len, pos);
> +    if (res != len) {
> +        if (res < 0 && (errno == EINTR || errno == EAGAIN)) {
> +            goto again;

code loops with while or for.

> +        }
> +        fprintf(stderr, "%s: read failed: %s (fd: %i)\n",
> +                __func__, strerror(errno), fd);
> +        return -errno;
> +    }
> +    return 0;
> +}
> +static int host_pci_config_write(HostPCIDevice *d,
> +                                 int pos, const void *buf, int len)
> +{
> +    int fd = host_pci_config_fd(d);
> +    int res = 0;
> +
> +again:
> +    res = pwrite(fd, buf, len, pos);
> +    if (res != len) {
> +        if (res < 0 && (errno == EINTR || errno == EAGAIN)) {
> +            goto again;
> +        }
> +        fprintf(stderr, "%s: write failed: %s\n",
> +                __func__, strerror(errno));
> +        return -errno;
> +    }
> +    return 0;
> +}
> +

same comments as above. also,
Don't report errors with fprintf.

> +int host_pci_get_byte(HostPCIDevice *d, int pos, uint8_t *p)
> +{
> +    uint8_t buf;
> +    int rc = host_pci_config_read(d, pos, &buf, 1);
> +    if (rc == 0) {

!rc.

> +        *p = buf;

why not pass in p directly?

> +    }
> +    return rc;
> +}
> +int host_pci_get_word(HostPCIDevice *d, int pos, uint16_t *p)
> +{
> +    uint16_t buf;
> +    int rc = host_pci_config_read(d, pos, &buf, 2);
> +    if (rc == 0) {

!rc.

> +        *p = le16_to_cpu(buf);
> +    }
> +    return rc;
> +}

This looks wrong wrt endian-ness.

> +int host_pci_get_long(HostPCIDevice *d, int pos, uint32_t *p)
> +{
> +    uint32_t buf;
> +    int rc = host_pci_config_read(d, pos, &buf, 4);
> +    if (rc == 0) {
> +        *p = le32_to_cpu(buf);
> +    }
> +    return rc;
> +}


Add empty lines between {}

> +int host_pci_get_block(HostPCIDevice *d, int pos, uint8_t *buf, int len)
> +{
> +    return host_pci_config_read(d, pos, buf, len);
> +}

when would this be useful?

> +
> +int host_pci_set_byte(HostPCIDevice *d, int pos, uint8_t data)
> +{
> +    return host_pci_config_write(d, pos, &data, 1);
> +}
> +int host_pci_set_word(HostPCIDevice *d, int pos, uint16_t data)
> +{
> +    data = cpu_to_le16(data);
> +    return host_pci_config_write(d, pos, &data, 2);
> +}
> +int host_pci_set_long(HostPCIDevice *d, int pos, uint32_t data)
> +{
> +    data = cpu_to_le32(data);
> +    return host_pci_config_write(d, pos, &data, 4);
> +}
> +int host_pci_set_block(HostPCIDevice *d, int pos, uint8_t *buf, int len)
> +{
> +    return host_pci_config_write(d, pos, buf, len);
> +}
> +
> +uint32_t host_pci_find_ext_cap_offset(HostPCIDevice *d, uint32_t cap)

Why 32? Ext config offsets are < 12 bit.

> +{
> +    uint32_t header = 0;
> +    int max_cap = PCI_MAX_EXT_CAP;
> +    int pos = PCI_CONFIG_SPACE_SIZE;
> +
> +    do {
> +        if (host_pci_get_long(d, pos, &header)) {
> +            break;
> +        }
> +        /*
> +         * If we have no capabilities, this is indicated by cap ID,
> +         * cap version and next pointer all being 0.
> +         */
> +        if (header == 0) {
> +            break;
> +        }
> +
> +        if (PCI_EXT_CAP_ID(header) == cap) {
> +            return pos;
> +        }
> +
> +        pos = PCI_EXT_CAP_NEXT(header);
> +        if (pos < PCI_CONFIG_SPACE_SIZE) {
> +            break;
> +        }
> +
> +        max_cap--;
> +    } while (max_cap > 0);
> +
> +    return 0;
> +}
> +
> +HostPCIDevice *host_pci_device_get(uint8_t bus, uint8_t dev, uint8_t func)

Why skip domain in the interface?
Also, HostPCIDevice structure is public so there is little value
in allocating, just get it by pointer and init/cleanup.


> +{
> +    HostPCIDevice *d = NULL;
> +    unsigned long v = 0;
> +
> +    d = g_new0(HostPCIDevice, 1);
> +
> +    d->config_fd = -1;
> +    d->domain = 0;
> +    d->bus = bus;
> +    d->dev = dev;
> +    d->func = func;
> +
> +    if (host_pci_config_fd(d) == -1) {
> +        goto error;
> +    }
> +    if (get_resource(d) != 0) {

just get_resource(d).

> +        goto error;
> +    }
> +
> +    if (get_hex_value(d, "vendor", &v)) {
> +        goto error;
> +    }
> +    d->vendor_id = v;
> +    if (get_hex_value(d, "device", &v)) {
> +        goto error;
> +    }
> +    d->device_id = v;
> +    d->is_virtfn = pci_dev_is_virtfn(d);
> +
> +    return d;
> +error:
> +    if (d->config_fd >= 0) {
> +        close(d->config_fd);
> +    }
> +    g_free(d);
> +    return NULL;
> +}
> +
> +void host_pci_device_put(HostPCIDevice *d)
> +{
> +    if (d->config_fd >= 0) {
> +        close(d->config_fd);
> +    }
> +    g_free(d);
> +}
> diff --git a/hw/host-pci-device.h b/hw/host-pci-device.h
> new file mode 100644
> index 0000000..c8880eb
> --- /dev/null
> +++ b/hw/host-pci-device.h
> @@ -0,0 +1,75 @@
> +#ifndef HW_HOST_PCI_DEVICE
> +#  define HW_HOST_PCI_DEVICE

Don't put space after #.

Also HOST_PCI_DEVICE_H would be less likely to confuse.

> +
> +#include "pci.h"
> +
> +/*
> + * from linux/ioport.h
> + * IO resources have these defined flags.
> + */
> +#define IORESOURCE_BITS         0x000000ff      /* Bus-specific bits */
> +
> +#define IORESOURCE_TYPE_BITS    0x00000f00      /* Resource type */
> +#define IORESOURCE_IO           0x00000100
> +#define IORESOURCE_MEM          0x00000200
> +#define IORESOURCE_IRQ          0x00000400
> +#define IORESOURCE_DMA          0x00000800
> +
> +#define IORESOURCE_PREFETCH     0x00001000      /* No side effects */
> +#define IORESOURCE_READONLY     0x00002000
> +#define IORESOURCE_CACHEABLE    0x00004000
> +#define IORESOURCE_RANGELENGTH  0x00008000
> +#define IORESOURCE_SHADOWABLE   0x00010000
> +
> +#define IORESOURCE_SIZEALIGN    0x00020000      /* size indicates alignment 
> */
> +#define IORESOURCE_STARTALIGN   0x00040000      /* start field is alignment 
> */
> +
> +#define IORESOURCE_MEM_64       0x00100000
> +
> +    /* Userland may not map this resource */
> +#define IORESOURCE_EXCLUSIVE    0x08000000
> +#define IORESOURCE_DISABLED     0x10000000
> +#define IORESOURCE_UNSET        0x20000000
> +#define IORESOURCE_AUTO         0x40000000
> +    /* Driver has marked this resource busy */
> +#define IORESOURCE_BUSY         0x80000000
> +

Why do above make sense in an API?
Abstract it in some reasonable way, don't just expose
flags from sysfs as is.

> +

kill extra empty lines

> +typedef struct HostPCIIORegion {
> +    unsigned long flags;
> +    pcibus_t base_addr;
> +    pcibus_t size;
> +} HostPCIIORegion;
> +
> +typedef struct HostPCIDevice {
> +    uint16_t domain;
> +    uint8_t bus;
> +    uint8_t dev;
> +    uint8_t func;
> +
> +    uint16_t vendor_id;
> +    uint16_t device_id;
> +
> +    HostPCIIORegion io_regions[PCI_NUM_REGIONS - 1];
> +    HostPCIIORegion rom;
> +
> +    bool is_virtfn;
> +
> +    int config_fd;
> +} HostPCIDevice;
> +
> +HostPCIDevice *host_pci_device_get(uint8_t bus, uint8_t dev, uint8_t func);
> +void host_pci_device_put(HostPCIDevice *pci_dev);
> +
> +int host_pci_get_byte(HostPCIDevice *d, int pos, uint8_t *p);
> +int host_pci_get_word(HostPCIDevice *d, int pos, uint16_t *p);
> +int host_pci_get_long(HostPCIDevice *d, int pos, uint32_t *p);
> +int host_pci_get_block(HostPCIDevice *d, int pos, uint8_t *buf, int len);
> +int host_pci_set_byte(HostPCIDevice *d, int pos, uint8_t data);
> +int host_pci_set_word(HostPCIDevice *d, int pos, uint16_t data);
> +int host_pci_set_long(HostPCIDevice *d, int pos, uint32_t data);
> +int host_pci_set_block(HostPCIDevice *d, int pos, uint8_t *buf, int len);
> +
> +uint32_t host_pci_find_ext_cap_offset(HostPCIDevice *s, uint32_t cap);
> +
> +#endif /* !HW_HOST_PCI_DEVICE */
> -- 
> Anthony PERARD

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.