|
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [Xen-devel] [PATCH v3 1/2] libxl: Implement the handler to handle unrecoverable AER errors
On Mon, Aug 07, 2017 at 06:54:56PM -0500, Venu Busireddy wrote:
> Implement the callback function to handle unrecoverable AER errors, and
> also the public APIs that can be used to register/unregister the handler.
> When an AER error occurs, the handler will forcibly remove the erring
> PCIe device from the guest.
>
> Signed-off-by: Venu Busireddy <venu.busireddy@xxxxxxxxxx>
> ---
> tools/libxl/libxl.h | 14 +++++++
> tools/libxl/libxl_event.h | 13 +++++++
> tools/libxl/libxl_internal.h | 7 ++++
> tools/libxl/libxl_pci.c | 90
> ++++++++++++++++++++++++++++++++++++++++++++
> 4 files changed, 124 insertions(+)
>
> diff --git a/tools/libxl/libxl.h b/tools/libxl/libxl.h
> index 7cf0f31..c5af0aa 100644
> --- a/tools/libxl/libxl.h
> +++ b/tools/libxl/libxl.h
> @@ -1044,6 +1044,20 @@ void libxl_mac_copy(libxl_ctx *ctx, libxl_mac *dst,
> const libxl_mac *src);
> */
> #define LIBXL_HAVE_QED 1
>
> +/* LIBXL_HAVE_REG_AER_EVENTS_HANDLER
> + *
> + * If it is defined, libxl has a library function called
> + * libxl_reg_aer_events_handler.
> + */
> +#define LIBXL_HAVE_REG_AER_EVENTS_HANDLER 1
> +
> +/* LIBXL_HAVE_UNREG_AER_EVENTS_HANDLER
> + *
> + * If it is defined, libxl has a library function called
> + * libxl_unreg_aer_events_handler.
> + */
> +#define LIBXL_HAVE_UNREG_AER_EVENTS_HANDLER 1
> +
You can consolidate both into
LIBLX_HAVE_AER_EVENTS_HANDLER
> typedef char **libxl_string_list;
> void libxl_string_list_dispose(libxl_string_list *sl);
> int libxl_string_list_length(const libxl_string_list *sl);
> diff --git a/tools/libxl/libxl_event.h b/tools/libxl/libxl_event.h
> index 1ea789e..1aea906 100644
> --- a/tools/libxl/libxl_event.h
> +++ b/tools/libxl/libxl_event.h
> @@ -184,6 +184,19 @@ void libxl_evdisable_domain_death(libxl_ctx *ctx,
> libxl_evgen_domain_death*);
> * may generate only a DEATH event.
> */
>
> +typedef struct libxl__aer_watch libxl_aer_watch;
> +int libxl_reg_aer_events_handler(libxl_ctx *, uint32_t, libxl_aer_watch **)
> + LIBXL_EXTERNAL_CALLERS_ONLY;
> + /*
> + * Registers a handler to handle the occurrence of unrecoverable AER
> errors.
> + * This function depends on the calling application running the libxl's
> + * internal event loop. Toolstacks that do not use libxl's internal
> + * event loop must arrange to have their own event loop created and enter
> + * libxl (say, call libxl_event_wait()), to enable the event to be
> processed.
> + */
> +void libxl_unreg_aer_events_handler(libxl_ctx *, uint32_t, libxl_aer_watch *)
> + LIBXL_EXTERNAL_CALLERS_ONLY;
> +
> typedef struct libxl__evgen_disk_eject libxl_evgen_disk_eject;
> int libxl_evenable_disk_eject(libxl_ctx *ctx, uint32_t domid, const char
> *vdev,
> libxl_ev_user, libxl_evgen_disk_eject **evgen_out);
> diff --git a/tools/libxl/libxl_internal.h b/tools/libxl/libxl_internal.h
> index afe6652..2b74286 100644
> --- a/tools/libxl/libxl_internal.h
> +++ b/tools/libxl/libxl_internal.h
> @@ -352,6 +352,13 @@ struct libxl__ev_child {
> LIBXL_LIST_ENTRY(struct libxl__ev_child) entry;
> };
>
> +/*
> + * Structure used for AER event handling.
> + */
> +struct libxl__aer_watch {
> + uint32_t domid;
> + libxl__ev_xswatch watch;
> +};
>
> /*
> * evgen structures, which are the state we use for generating
> diff --git a/tools/libxl/libxl_pci.c b/tools/libxl/libxl_pci.c
> index 65ad5e5..feedf27 100644
> --- a/tools/libxl/libxl_pci.c
> +++ b/tools/libxl/libxl_pci.c
> @@ -1678,6 +1678,96 @@ static int libxl_device_pci_compare(libxl_device_pci
> *d1,
> return COMPARE_PCI(d1, d2);
> }
>
> +static void aer_backend_watch_callback(libxl__egc *egc,
> + libxl__ev_xswatch *watch,
> + const char *watch_path,
> + const char *event_path)
> +{
> + EGC_GC;
> + libxl_aer_watch *aer_ws = CONTAINER_OF(watch, *aer_ws, watch);
> + int rc;
> + uint32_t dom, bus, dev, fn;
> + uint32_t domid = aer_ws->domid;
> + char *p, *path;
> + const char *aerFailedSBDF;
> + libxl_device_pci pcidev;
> +
> + /* Extract the backend directory. */
> + path = libxl__strdup(gc, event_path);
> + p = strrchr(path, '/');
> + if ((p == NULL) || (strcmp(p, "/aerFailedSBDF") != 0))
> + return;
> + /* Truncate the string so it points to the backend directory. */
> + *p = '\0';
> +
> + /* Fetch the value of the failed PCI device. */
> + rc = libxl__xs_read_checked(gc, XBT_NULL,
> + GCSPRINTF("%s/aerFailedSBDF", path), &aerFailedSBDF);
> + if (rc || !aerFailedSBDF)
> + return;
> + sscanf(aerFailedSBDF, "%x:%x:%x.%x", &dom, &bus, &dev, &fn);
> +
> + libxl_device_pci_init(&pcidev);
> + pcidev_struct_fill(&pcidev, dom, bus, dev, fn, 0);
> + /* Forcibly remove the device from the guest */
> + rc = libxl__device_pci_remove_common(gc, domid, &pcidev, 1);
> + if (rc)
> + LOGD(ERROR, domid, " libxl__device_pci_remove_common() failed,
> rc=x%x",
> + (unsigned int)rc);
> +
> + return;
> +}
> +
> +int libxl_reg_aer_events_handler(libxl_ctx *ctx,
> + uint32_t domid,
> + libxl_aer_watch **aer_ws_out)
Afaict libxl_aer_watch is an opaque type to external caller, so this
won't work, right?
> +{
> + int rc = 0;
> + int dom0_domid;
uint32_t pciback_domid;
> + char *be_path;
> + libxl_aer_watch *aer_ws = NULL;
> + GC_INIT(ctx);
> +
> + *aer_ws_out = NULL;
> +
> + rc = libxl__get_domid(gc, (uint32_t *)(&dom0_domid));
> + if (rc) {
> + LOGD(ERROR, domid, " libxl__get_domid() failed, rc = %d", rc);
> + goto out;
> + }
> +
> + aer_ws = malloc(sizeof(libxl_aer_watch));
libxl__calloc(NOGC, ...);
And then you can skip the check and memset.
> + if (!aer_ws) {
> + rc = ERROR_NOMEM;
> + goto out;
> + }
> + memset(aer_ws, 0, sizeof(libxl_aer_watch));
> +
> + aer_ws->domid = domid;
> + be_path = GCSPRINTF("/local/domain/%d/backend/pci/%u/%d/%s",
> + dom0_domid, domid, dom0_domid, "aerFailedSBDF");
Use %u here.
> + rc = libxl__ev_xswatch_register(gc, &aer_ws->watch,
> + aer_backend_watch_callback, be_path);
> + *aer_ws_out = aer_ws;
> +
> +out:
> + GC_FREE;
> + return rc;
> +}
> +
> +void libxl_unreg_aer_events_handler(libxl_ctx *ctx,
> + uint32_t domid,
> + libxl_aer_watch *aer_ws)
> +{
> + GC_INIT(ctx);
> +
> + libxl__ev_xswatch_deregister(gc, &aer_ws->watch);
> +
> + free(aer_ws);
> + GC_FREE;
> + return;
> +}
I think a bigger question is whether you agree with Ian's comments
regarding API design and whether you have more questions?
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
https://lists.xen.org/xen-devel
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |