[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [v8][PATCH 02/17] introduce XEN_DOMCTL_set_rdm



> From: Chen, Tiejun
> Sent: Monday, December 01, 2014 5:24 PM
> 
> This should be based on a new parameter globally, 'pci_rdmforce'.
> 
> pci_rdmforce = 1 => Of course this should be 0 by default.
> 
> '1' means we should force check to reserve all ranges. If failed
> VM wouldn't be created successfully. This also can give user a
> chance to work well with later hotplug, even if not a device
> assignment while creating VM.
> 
> But we can override that by one specific pci device:
> 
> pci = ['AA:BB.CC,rdmforce=0/1]
> 
> But this 'rdmforce' should be 1 by default since obviously any
> passthrough device always need to do this. Actually no one really
> want to set as '0' so it may be unnecessary but I'd like to leave
> this as a potential approach.

since no one requires it, why bother adding it? better to just
keep global option.

> 
> So this domctl provides an approach to control how to populate
> reserved device memory by tools.
> 
> Note we always post a message to user about this once we owns
> RMRR.
> 
> Signed-off-by: Tiejun Chen <tiejun.chen@xxxxxxxxx>
> ---
>  docs/man/xl.cfg.pod.5              |  6 +++++
>  docs/misc/vtd.txt                  | 15 ++++++++++++
>  tools/libxc/include/xenctrl.h      |  6 +++++
>  tools/libxc/xc_domain.c            | 28 +++++++++++++++++++++++
>  tools/libxl/libxl_create.c         |  3 +++
>  tools/libxl/libxl_dm.c             | 47
> ++++++++++++++++++++++++++++++++++++++
>  tools/libxl/libxl_internal.h       |  4 ++++
>  tools/libxl/libxl_types.idl        |  2 ++
>  tools/libxl/libxlu_pci.c           |  2 ++
>  tools/libxl/xl_cmdimpl.c           | 10 ++++++++
>  xen/drivers/passthrough/pci.c      | 39
> +++++++++++++++++++++++++++++++
>  xen/drivers/passthrough/vtd/dmar.c |  8 +++++++
>  xen/include/asm-x86/hvm/domain.h   |  4 ++++
>  xen/include/public/domctl.h        | 21 +++++++++++++++++
>  xen/xsm/flask/hooks.c              |  1 +
>  15 files changed, 196 insertions(+)
> 
> diff --git a/docs/man/xl.cfg.pod.5 b/docs/man/xl.cfg.pod.5
> index 622ea53..9adc41e 100644
> --- a/docs/man/xl.cfg.pod.5
> +++ b/docs/man/xl.cfg.pod.5
> @@ -645,6 +645,12 @@ dom0 without confirmation.  Please use with care.
>  D0-D3hot power management states for the PCI device. False (0) by
>  default.
> 
> +=item B<rdmforce=BOOLEAN>
> +
> +(HVM/x86 only) Specifies that the VM would force to check and try to
> +reserve all reserved device memory, like RMRR, associated to the PCI
> +device. False (0) by default.
> +
>  =back
> 
>  =back
> diff --git a/docs/misc/vtd.txt b/docs/misc/vtd.txt
> index 9af0e99..23544d5 100644
> --- a/docs/misc/vtd.txt
> +++ b/docs/misc/vtd.txt
> @@ -111,6 +111,21 @@ in the config file:
>  To override for a specific device:
>       pci = [ '01:00.0,msitranslate=0', '03:00.0' ]
> 
> +RDM, 'reserved device memory', for PCI Device Passthrough
> +---------------------------------------------------------
> +
> +The BIOS controls some devices in terms of some reginos of memory used for
> +these devices. This kind of region should be reserved before creating a VM
> +to make sure they are not occupied by RAM/MMIO to conflict, and also we
> can
> +create necessary IOMMU table successfully.
> +
> +To enable this globally, add "pci_rdmforce" in the config file:
> +
> +     pci_rdmforce = 1         (default is 0)
> +
> +Or just enable for a specific device:
> +     pci = [ '01:00.0,rdmforce=1', '03:00.0' ]
> +
> 
>  Caveat on Conventional PCI Device Passthrough
>  ---------------------------------------------
> diff --git a/tools/libxc/include/xenctrl.h b/tools/libxc/include/xenctrl.h
> index 0ad8b8d..84012fe 100644
> --- a/tools/libxc/include/xenctrl.h
> +++ b/tools/libxc/include/xenctrl.h
> @@ -2038,6 +2038,12 @@ int xc_assign_device(xc_interface *xch,
>                       uint32_t domid,
>                       uint32_t machine_bdf);
> 
> +int xc_domain_device_setrdm(xc_interface *xch,
> +                            uint32_t domid,
> +                            uint32_t num_pcidevs,
> +                            uint32_t pci_rdmforce,
> +                            struct xen_guest_pcidev_info *pcidevs);
> +
>  int xc_get_device_group(xc_interface *xch,
>                       uint32_t domid,
>                       uint32_t machine_bdf,
> diff --git a/tools/libxc/xc_domain.c b/tools/libxc/xc_domain.c
> index b864872..7fd43e9 100644
> --- a/tools/libxc/xc_domain.c
> +++ b/tools/libxc/xc_domain.c
> @@ -1633,6 +1633,34 @@ int xc_assign_device(
>      return do_domctl(xch, &domctl);
>  }
> 
> +int xc_domain_device_setrdm(xc_interface *xch,
> +                            uint32_t domid,
> +                            uint32_t num_pcidevs,
> +                            uint32_t pci_rdmforce,
> +                            struct xen_guest_pcidev_info *pcidevs)
> +{
> +    int ret;
> +    DECLARE_DOMCTL;
> +    DECLARE_HYPERCALL_BOUNCE(pcidevs,
> +
> num_pcidevs*sizeof(xen_guest_pcidev_info_t),
> +                             XC_HYPERCALL_BUFFER_BOUNCE_IN);
> +
> +    if ( xc_hypercall_bounce_pre(xch, pcidevs) )
> +        return -1;
> +
> +    domctl.cmd = XEN_DOMCTL_set_rdm;
> +    domctl.domain = (domid_t)domid;
> +    domctl.u.set_rdm.flags = pci_rdmforce;
> +    domctl.u.set_rdm.num_pcidevs = num_pcidevs;
> +    set_xen_guest_handle(domctl.u.set_rdm.pcidevs, pcidevs);
> +
> +    ret = do_domctl(xch, &domctl);
> +
> +    xc_hypercall_bounce_post(xch, pcidevs);
> +
> +    return ret;
> +}
> +
>  int xc_get_device_group(
>      xc_interface *xch,
>      uint32_t domid,
> diff --git a/tools/libxl/libxl_create.c b/tools/libxl/libxl_create.c
> index 1198225..c615686 100644
> --- a/tools/libxl/libxl_create.c
> +++ b/tools/libxl/libxl_create.c
> @@ -862,6 +862,9 @@ static void initiate_domain_create(libxl__egc *egc,
>      ret = libxl__domain_build_info_setdefault(gc, &d_config->b_info);
>      if (ret) goto error_out;
> 
> +    ret = libxl__domain_device_setrdm(gc, d_config, domid);
> +    if (ret) goto error_out;
> +
>      if (!sched_params_valid(gc, domid, &d_config->b_info.sched_params)) {
>          LOG(ERROR, "Invalid scheduling parameters\n");
>          ret = ERROR_INVAL;
> diff --git a/tools/libxl/libxl_dm.c b/tools/libxl/libxl_dm.c
> index 3e191c3..e50587d 100644
> --- a/tools/libxl/libxl_dm.c
> +++ b/tools/libxl/libxl_dm.c
> @@ -90,6 +90,53 @@ const char *libxl__domain_device_model(libxl__gc *gc,
>      return dm;
>  }
> 
> +int libxl__domain_device_setrdm(libxl__gc *gc,
> +                                libxl_domain_config *d_config,
> +                                uint32_t dm_domid)
> +{
> +    int i, ret;
> +    libxl_ctx *ctx = libxl__gc_owner(gc);
> +    struct xen_guest_pcidev_info *pcidevs = NULL;
> +    uint32_t rdmforce = 0;
> +
> +    if ( d_config->num_pcidevs )
> +    {
> +        pcidevs =
> malloc(d_config->num_pcidevs*sizeof(xen_guest_pcidev_info_t));
> +        if ( pcidevs )
> +        {
> +            for (i = 0; i < d_config->num_pcidevs; i++)
> +            {
> +                pcidevs[i].devfn = PCI_DEVFN(d_config->pcidevs[i].dev,
> +
> d_config->pcidevs[i].func);
> +                pcidevs[i].bus = d_config->pcidevs[i].bus;
> +                pcidevs[i].seg = d_config->pcidevs[i].domain;
> +                pcidevs[i].flags = d_config->pcidevs[i].rdmforce &
> +                                   PCI_DEV_RDM_CHECK;
> +            }
> +        }
> +        else
> +        {
> +            LIBXL__LOG(CTX, LIBXL__LOG_ERROR,
> +                               "Can't allocate for pcidevs.");
> +            return -1;
> +        }
> +    }
> +    rdmforce = libxl_defbool_val(d_config->b_info.rdmforce) ? 1 : 0;
> +
> +    /* Nothing to do. */
> +    if ( !rdmforce && !d_config->num_pcidevs )
> +        return 0;

move check before creating pcidevs.

> +
> +    ret = xc_domain_device_setrdm(ctx->xch, dm_domid,
> +                                  (uint32_t)d_config->num_pcidevs,
> +                                  rdmforce,
> +                                  pcidevs);
> +    if ( d_config->num_pcidevs )
> +        free(pcidevs);
> +
> +    return ret;
> +}
> +
>  const libxl_vnc_info *libxl__dm_vnc(const libxl_domain_config
> *guest_config)
>  {
>      const libxl_vnc_info *vnc = NULL;
> diff --git a/tools/libxl/libxl_internal.h b/tools/libxl/libxl_internal.h
> index a38f695..be397a6 100644
> --- a/tools/libxl/libxl_internal.h
> +++ b/tools/libxl/libxl_internal.h
> @@ -1477,6 +1477,10 @@ _hidden int libxl__need_xenpv_qemu(libxl__gc
> *gc,
>          int nr_disks, libxl_device_disk *disks,
>          int nr_channels, libxl_device_channel *channels);
> 
> +_hidden int libxl__domain_device_setrdm(libxl__gc *gc,
> +                                        libxl_domain_config *info,
> +                                        uint32_t domid);
> +
>  /*
>   * This function will cause the whole libxl process to hang
>   * if the device model does not respond.  It is deprecated.
> diff --git a/tools/libxl/libxl_types.idl b/tools/libxl/libxl_types.idl
> index f7fc695..0076a32 100644
> --- a/tools/libxl/libxl_types.idl
> +++ b/tools/libxl/libxl_types.idl
> @@ -398,6 +398,7 @@ libxl_domain_build_info =
> Struct("domain_build_info",[
>      ("kernel",           string),
>      ("cmdline",          string),
>      ("ramdisk",          string),
> +    ("rdmforce",         libxl_defbool),
>      ("u", KeyedUnion(None, libxl_domain_type, "type",
>                  [("hvm", Struct(None, [("firmware",         string),
>                                         ("bios",
> libxl_bios_type),
> @@ -518,6 +519,7 @@ libxl_device_pci = Struct("device_pci", [
>      ("power_mgmt", bool),
>      ("permissive", bool),
>      ("seize", bool),
> +    ("rdmforce", bool),
>      ])
> 
>  libxl_device_vtpm = Struct("device_vtpm", [
> diff --git a/tools/libxl/libxlu_pci.c b/tools/libxl/libxlu_pci.c
> index 26fb143..989eac8 100644
> --- a/tools/libxl/libxlu_pci.c
> +++ b/tools/libxl/libxlu_pci.c
> @@ -143,6 +143,8 @@ int xlu_pci_parse_bdf(XLU_Config *cfg,
> libxl_device_pci *pcidev, const char *str
>                      pcidev->permissive = atoi(tok);
>                  }else if ( !strcmp(optkey, "seize") ) {
>                      pcidev->seize = atoi(tok);
> +                }else if ( !strcmp(optkey, "rdmforce") ) {
> +                    pcidev->rdmforce = atoi(tok);
>                  }else{
>                      XLU__PCI_ERR(cfg, "Unknown PCI BDF option: %s",
> optkey);
>                  }
> diff --git a/tools/libxl/xl_cmdimpl.c b/tools/libxl/xl_cmdimpl.c
> index 0e754e7..9c23733 100644
> --- a/tools/libxl/xl_cmdimpl.c
> +++ b/tools/libxl/xl_cmdimpl.c
> @@ -919,6 +919,7 @@ static void parse_config_data(const char
> *config_source,
>      int pci_msitranslate = 0;
>      int pci_permissive = 0;
>      int pci_seize = 0;
> +    int pci_rdmforce = 0;
>      int i, e;
> 
>      libxl_domain_create_info *c_info = &d_config->c_info;
> @@ -1699,6 +1700,9 @@ skip_vfb:
>      if (!xlu_cfg_get_long (config, "pci_seize", &l, 0))
>          pci_seize = l;
> 
> +    if (!xlu_cfg_get_long (config, "pci_rdmforce", &l, 0))
> +        pci_rdmforce = l;
> +
>      /* To be reworked (automatically enabled) once the auto ballooning
>       * after guest starts is done (with PCI devices passed in). */
>      if (c_info->type == LIBXL_DOMAIN_TYPE_PV) {
> @@ -1719,6 +1723,7 @@ skip_vfb:
>              pcidev->power_mgmt = pci_power_mgmt;
>              pcidev->permissive = pci_permissive;
>              pcidev->seize = pci_seize;
> +            pcidev->rdmforce = pci_rdmforce;
>              if (!xlu_pci_parse_bdf(config, pcidev, buf))
>                  d_config->num_pcidevs++;
>          }
> @@ -1726,6 +1731,11 @@ skip_vfb:
>              libxl_defbool_set(&b_info->u.pv.e820_host, true);
>      }
> 
> +    if ((c_info->type == LIBXL_DOMAIN_TYPE_HVM) && pci_rdmforce)
> +        libxl_defbool_set(&b_info->rdmforce, true);
> +    else
> +        libxl_defbool_set(&b_info->rdmforce, false);
> +
>      switch (xlu_cfg_get_list(config, "cpuid", &cpuids, 0, 1)) {
>      case 0:
>          {
> diff --git a/xen/drivers/passthrough/pci.c b/xen/drivers/passthrough/pci.c
> index 78c6977..ae924ad 100644
> --- a/xen/drivers/passthrough/pci.c
> +++ b/xen/drivers/passthrough/pci.c
> @@ -34,6 +34,7 @@
>  #include <xen/tasklet.h>
>  #include <xsm/xsm.h>
>  #include <asm/msi.h>
> +#include <xen/stdbool.h>
> 
>  struct pci_seg {
>      struct list_head alldevs_list;
> @@ -1553,6 +1554,44 @@ int iommu_do_pci_domctl(
>          }
>          break;
> 
> +    case XEN_DOMCTL_set_rdm:
> +    {
> +        struct xen_domctl_set_rdm *xdsr = &domctl->u.set_rdm;
> +        struct xen_guest_pcidev_info *pcidevs = NULL;
> +        struct domain *d = rcu_lock_domain_by_any_id(domctl->domain);
> +
> +        if ( d == NULL )
> +            return -ESRCH;
> +
> +        d->arch.hvm_domain.pci_force =
> +                            xdsr->flags & PCI_DEV_RDM_CHECK ?
> true : false;
> +        d->arch.hvm_domain.num_pcidevs = xdsr->num_pcidevs;
> +        d->arch.hvm_domain.pcidevs = NULL;
> +
> +        if ( xdsr->num_pcidevs )
> +        {
> +            pcidevs = xmalloc_array(xen_guest_pcidev_info_t,
> +                                    xdsr->num_pcidevs);
> +            if ( pcidevs == NULL )
> +            {
> +                rcu_unlock_domain(d);
> +                return -ENOMEM;
> +            }
> +
> +            if ( copy_from_guest(pcidevs, xdsr->pcidevs,
> +
> xdsr->num_pcidevs*sizeof(*pcidevs)) )
> +            {
> +                xfree(pcidevs);
> +                rcu_unlock_domain(d);
> +                return -EFAULT;
> +            }
> +        }
> +
> +        d->arch.hvm_domain.pcidevs = pcidevs;
> +        rcu_unlock_domain(d);
> +    }
> +        break;
> +
>      case XEN_DOMCTL_assign_device:
>          if ( unlikely(d->is_dying) )
>          {
> diff --git a/xen/drivers/passthrough/vtd/dmar.c
> b/xen/drivers/passthrough/vtd/dmar.c
> index 1152c3a..5e41e7a 100644
> --- a/xen/drivers/passthrough/vtd/dmar.c
> +++ b/xen/drivers/passthrough/vtd/dmar.c
> @@ -674,6 +674,14 @@ acpi_parse_one_rmrr(struct acpi_dmar_header
> *header)
>                          "  RMRR region: base_addr %"PRIx64
>                          " end_address %"PRIx64"\n",
>                          rmrru->base_address, rmrru->end_address);
> +            /*
> +             * TODO: we may provide a precise paramter just to reserve
> +             * RMRR range specific to one device.
> +             */
> +            dprintk(XENLOG_WARNING VTDPREFIX,
> +                    "So please set pci_rdmforce to reserve these ranges"
> +                    " if you need such a device in hotplug case.\n");
> +
>              acpi_register_rmrr_unit(rmrru);
>          }
>      }
> diff --git a/xen/include/asm-x86/hvm/domain.h
> b/xen/include/asm-x86/hvm/domain.h
> index 2757c7f..38530e5 100644
> --- a/xen/include/asm-x86/hvm/domain.h
> +++ b/xen/include/asm-x86/hvm/domain.h
> @@ -90,6 +90,10 @@ struct hvm_domain {
>      /* Cached CF8 for guest PCI config cycles */
>      uint32_t                pci_cf8;
> 
> +    bool_t                  pci_force;
> +    uint32_t                num_pcidevs;
> +    struct xen_guest_pcidev_info      *pcidevs;
> +
>      struct pl_time         pl_time;
> 
>      struct hvm_io_handler *io_handler;
> diff --git a/xen/include/public/domctl.h b/xen/include/public/domctl.h
> index 57e2ed7..ba8970d 100644
> --- a/xen/include/public/domctl.h
> +++ b/xen/include/public/domctl.h
> @@ -508,6 +508,25 @@ struct xen_domctl_get_device_group {
>  typedef struct xen_domctl_get_device_group
> xen_domctl_get_device_group_t;
>  DEFINE_XEN_GUEST_HANDLE(xen_domctl_get_device_group_t);
> 
> +/* Currently just one bit to indicate force to check Reserved Device Memory.
> */
> +#define PCI_DEV_RDM_CHECK   0x1
> +struct xen_guest_pcidev_info {
> +    uint16_t    seg;
> +    uint8_t     bus;
> +    uint8_t     devfn;
> +    uint32_t    flags;
> +};
> +typedef struct xen_guest_pcidev_info xen_guest_pcidev_info_t;
> +DEFINE_XEN_GUEST_HANDLE(xen_guest_pcidev_info_t);
> +/* Control whether/how we check and reserve device memory. */
> +struct xen_domctl_set_rdm {
> +    uint32_t    flags;
> +    uint32_t    num_pcidevs;
> +    XEN_GUEST_HANDLE_64(xen_guest_pcidev_info_t) pcidevs;
> +};
> +typedef struct xen_domctl_set_rdm xen_domctl_set_rdm_t;
> +DEFINE_XEN_GUEST_HANDLE(xen_domctl_set_rdm_t);
> +
>  /* Pass-through interrupts: bind real irq -> hvm devfn. */
>  /* XEN_DOMCTL_bind_pt_irq */
>  /* XEN_DOMCTL_unbind_pt_irq */
> @@ -1070,6 +1089,7 @@ struct xen_domctl {
>  #define XEN_DOMCTL_setvnumainfo                  74
>  #define XEN_DOMCTL_psr_cmt_op                    75
>  #define XEN_DOMCTL_arm_configure_domain          76
> +#define XEN_DOMCTL_set_rdm                       77
>  #define XEN_DOMCTL_gdbsx_guestmemio            1000
>  #define XEN_DOMCTL_gdbsx_pausevcpu             1001
>  #define XEN_DOMCTL_gdbsx_unpausevcpu           1002
> @@ -1135,6 +1155,7 @@ struct xen_domctl {
>          struct xen_domctl_gdbsx_domstatus   gdbsx_domstatus;
>          struct xen_domctl_vnuma             vnuma;
>          struct xen_domctl_psr_cmt_op        psr_cmt_op;
> +        struct xen_domctl_set_rdm           set_rdm;
>          uint8_t                             pad[128];
>      } u;
>  };
> diff --git a/xen/xsm/flask/hooks.c b/xen/xsm/flask/hooks.c
> index d48463f..5a760e2 100644
> --- a/xen/xsm/flask/hooks.c
> +++ b/xen/xsm/flask/hooks.c
> @@ -592,6 +592,7 @@ static int flask_domctl(struct domain *d, int cmd)
>      case XEN_DOMCTL_test_assign_device:
>      case XEN_DOMCTL_assign_device:
>      case XEN_DOMCTL_deassign_device:
> +    case XEN_DOMCTL_set_rdm:
>  #endif
>          return 0;
> 
> --
> 1.9.1


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.