[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [PATCH v3 1/2] libxl: Implement the handler to handle unrecoverable AER errors



On Mon, Aug 07, 2017 at 06:54:56PM -0500, Venu Busireddy wrote:
> Implement the callback function to handle unrecoverable AER errors, and
> also the public APIs that can be used to register/unregister the handler.
> When an AER error occurs, the handler will forcibly remove the erring
> PCIe device from the guest.
> 
> Signed-off-by: Venu Busireddy <venu.busireddy@xxxxxxxxxx>
> ---
>  tools/libxl/libxl.h          | 14 +++++++
>  tools/libxl/libxl_event.h    | 13 +++++++
>  tools/libxl/libxl_internal.h |  7 ++++
>  tools/libxl/libxl_pci.c      | 90 
> ++++++++++++++++++++++++++++++++++++++++++++
>  4 files changed, 124 insertions(+)
> 
> diff --git a/tools/libxl/libxl.h b/tools/libxl/libxl.h
> index 7cf0f31..c5af0aa 100644
> --- a/tools/libxl/libxl.h
> +++ b/tools/libxl/libxl.h
> @@ -1044,6 +1044,20 @@ void libxl_mac_copy(libxl_ctx *ctx, libxl_mac *dst, 
> const libxl_mac *src);
>   */
>  #define LIBXL_HAVE_QED 1
>  
> +/* LIBXL_HAVE_REG_AER_EVENTS_HANDLER
> + *
> + * If it is defined, libxl has a library function called
> + * libxl_reg_aer_events_handler.
> + */
> +#define LIBXL_HAVE_REG_AER_EVENTS_HANDLER 1
> +
> +/* LIBXL_HAVE_UNREG_AER_EVENTS_HANDLER
> + *
> + * If it is defined, libxl has a library function called
> + * libxl_unreg_aer_events_handler.
> + */
> +#define LIBXL_HAVE_UNREG_AER_EVENTS_HANDLER 1
> +

You can consolidate both into

LIBLX_HAVE_AER_EVENTS_HANDLER

>  typedef char **libxl_string_list;
>  void libxl_string_list_dispose(libxl_string_list *sl);
>  int libxl_string_list_length(const libxl_string_list *sl);
> diff --git a/tools/libxl/libxl_event.h b/tools/libxl/libxl_event.h
> index 1ea789e..1aea906 100644
> --- a/tools/libxl/libxl_event.h
> +++ b/tools/libxl/libxl_event.h
> @@ -184,6 +184,19 @@ void libxl_evdisable_domain_death(libxl_ctx *ctx, 
> libxl_evgen_domain_death*);
>     * may generate only a DEATH event.
>     */
>  
> +typedef struct libxl__aer_watch libxl_aer_watch;
> +int libxl_reg_aer_events_handler(libxl_ctx *, uint32_t, libxl_aer_watch **)
> +                        LIBXL_EXTERNAL_CALLERS_ONLY;
> +  /*
> +   * Registers a handler to handle the occurrence of unrecoverable AER 
> errors.
> +   * This function depends on the calling application running the libxl's
> +   * internal event loop. Toolstacks that do not use libxl's internal
> +   * event loop must arrange to have their own event loop created and enter
> +   * libxl (say, call libxl_event_wait()), to enable the event to be 
> processed.
> +   */
> +void libxl_unreg_aer_events_handler(libxl_ctx *, uint32_t, libxl_aer_watch *)
> +                        LIBXL_EXTERNAL_CALLERS_ONLY;
> +
>  typedef struct libxl__evgen_disk_eject libxl_evgen_disk_eject;
>  int libxl_evenable_disk_eject(libxl_ctx *ctx, uint32_t domid, const char 
> *vdev,
>                          libxl_ev_user, libxl_evgen_disk_eject **evgen_out);
> diff --git a/tools/libxl/libxl_internal.h b/tools/libxl/libxl_internal.h
> index afe6652..2b74286 100644
> --- a/tools/libxl/libxl_internal.h
> +++ b/tools/libxl/libxl_internal.h
> @@ -352,6 +352,13 @@ struct libxl__ev_child {
>      LIBXL_LIST_ENTRY(struct libxl__ev_child) entry;
>  };
>  
> +/*
> + * Structure used for AER event handling.
> + */
> +struct libxl__aer_watch {
> +    uint32_t domid;
> +    libxl__ev_xswatch watch;
> +};
>  
>  /*
>   * evgen structures, which are the state we use for generating
> diff --git a/tools/libxl/libxl_pci.c b/tools/libxl/libxl_pci.c
> index 65ad5e5..feedf27 100644
> --- a/tools/libxl/libxl_pci.c
> +++ b/tools/libxl/libxl_pci.c
> @@ -1678,6 +1678,96 @@ static int libxl_device_pci_compare(libxl_device_pci 
> *d1,
>      return COMPARE_PCI(d1, d2);
>  }
>  
> +static void aer_backend_watch_callback(libxl__egc *egc,
> +                                       libxl__ev_xswatch *watch,
> +                                       const char *watch_path,
> +                                       const char *event_path)
> +{
> +    EGC_GC;
> +    libxl_aer_watch *aer_ws = CONTAINER_OF(watch, *aer_ws, watch);
> +    int rc;
> +    uint32_t dom, bus, dev, fn;
> +    uint32_t domid = aer_ws->domid;
> +    char *p, *path;
> +    const char *aerFailedSBDF;
> +    libxl_device_pci pcidev;
> +
> +    /* Extract the backend directory. */
> +    path = libxl__strdup(gc, event_path);
> +    p = strrchr(path, '/');
> +    if ((p == NULL) || (strcmp(p, "/aerFailedSBDF") != 0))
> +        return;
> +    /* Truncate the string so it points to the backend directory. */
> +    *p = '\0';
> +
> +    /* Fetch the value of the failed PCI device. */
> +    rc = libxl__xs_read_checked(gc, XBT_NULL,
> +            GCSPRINTF("%s/aerFailedSBDF", path), &aerFailedSBDF);
> +    if (rc || !aerFailedSBDF)
> +        return;
> +    sscanf(aerFailedSBDF, "%x:%x:%x.%x", &dom, &bus, &dev, &fn);
> +
> +    libxl_device_pci_init(&pcidev);
> +    pcidev_struct_fill(&pcidev, dom, bus, dev, fn, 0);
> +    /* Forcibly remove the device from the guest */
> +    rc = libxl__device_pci_remove_common(gc, domid, &pcidev, 1);
> +    if (rc)
> +        LOGD(ERROR, domid, " libxl__device_pci_remove_common() failed, 
> rc=x%x",
> +                (unsigned int)rc);
> +
> +    return;
> +}
> +
> +int libxl_reg_aer_events_handler(libxl_ctx *ctx,
> +                                 uint32_t domid,
> +                                 libxl_aer_watch **aer_ws_out)

Afaict libxl_aer_watch is an opaque type to external caller, so this
won't work, right?

> +{
> +    int rc = 0;
> +    int dom0_domid;

uint32_t pciback_domid;

> +    char *be_path;
> +    libxl_aer_watch *aer_ws = NULL;
> +    GC_INIT(ctx);
> +
> +    *aer_ws_out = NULL;
> +
> +    rc = libxl__get_domid(gc, (uint32_t *)(&dom0_domid));
> +    if (rc) {
> +        LOGD(ERROR, domid, " libxl__get_domid() failed, rc = %d", rc);
> +        goto out;
> +    }
> +
> +    aer_ws = malloc(sizeof(libxl_aer_watch));

libxl__calloc(NOGC, ...);

And then you can skip the check and memset.

> +    if (!aer_ws) {
> +        rc = ERROR_NOMEM;
> +        goto out;
> +    }
> +    memset(aer_ws, 0, sizeof(libxl_aer_watch));
> +
> +    aer_ws->domid = domid;
> +    be_path = GCSPRINTF("/local/domain/%d/backend/pci/%u/%d/%s",
> +            dom0_domid, domid, dom0_domid, "aerFailedSBDF");

Use %u here.

> +    rc = libxl__ev_xswatch_register(gc, &aer_ws->watch,
> +            aer_backend_watch_callback, be_path);
> +    *aer_ws_out = aer_ws;
> +
> +out:
> +    GC_FREE;
> +    return rc;
> +}
> +
> +void libxl_unreg_aer_events_handler(libxl_ctx *ctx,
> +                                    uint32_t domid,
> +                                    libxl_aer_watch *aer_ws)
> +{
> +    GC_INIT(ctx);
> +
> +    libxl__ev_xswatch_deregister(gc, &aer_ws->watch);
> +
> +    free(aer_ws);
> +    GC_FREE;
> +    return;
> +}

I think a bigger question is whether you agree with Ian's comments
regarding API design and whether you have more questions?

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
https://lists.xen.org/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.