[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-changelog] [xen staging-4.9] xl.conf: Add global affinity masks



commit 14f90aaef8d441cbdece5b74829e85e767fb196c
Author:     Wei Liu <wei.liu2@xxxxxxxxxx>
AuthorDate: Tue Aug 7 15:35:34 2018 +0100
Commit:     Andrew Cooper <andrew.cooper3@xxxxxxxxxx>
CommitDate: Tue Aug 14 17:20:02 2018 +0100

    xl.conf: Add global affinity masks
    
    XSA-273 involves one hyperthread being able to use Spectre-like
    techniques to "spy" on another thread.  The details are somewhat
    complicated, but the upshot is that after all Xen-based mitigations
    have been applied:
    
    * PV guests cannot spy on sibling threads
    * HVM guests can spy on sibling threads
    
    (NB that for purposes of this vulnerability, PVH and HVM guests are
    identical.  Whenever this comment refers to 'HVM', this includes PVH.)
    
    There are many possible mitigations to this, including disabling
    hyperthreading entirely.  But another solution would be:
    
    * Specify some cores as PV-only, others as PV or HVM
    * Allow HVM guests to only run on thread 0 of the "HVM-or-PV" cores
    * Allow PV guests to run on the above cores, as well as any thread of the 
PV-only cores.
    
    For example, suppose you had 16 threads across 8 cores (0-7).  You
    could specify 0-3 as PV-only, and 4-7 as HVM-or-PV.  Then you'd set
    the affinity of the HVM guests as follows (binary representation):
    
    0000000010101010
    
    And the affinity of the PV guests as follows:
    
    1111111110101010
    
    In order to make this easy, this patches introduces three "global affinity
    masks", placed in xl.conf:
    
        vm.cpumask
        vm.hvm.cpumask
        vm.pv.cpumask
    
    These are parsed just like the 'cpus' and 'cpus_soft' options in the
    per-domain xl configuration files.  The resulting mask is AND-ed with
    whatever mask results at the end of the xl configuration file.
    `vm.cpumask` would be applied to all guest types, `vm.hvm.cpumask`
    would be applied to HVM and PVH guest types, and `vm.pv.cpumask`
    would be applied to PV guest types.
    
    The idea would be that to implement the above mask across all your
    VMs, you'd simply add the following two lines to the configuration
    file:
    
        vm.hvm.cpumask=8,10,12,14
        vm.pv.cpumask=0-8,10,12,14
    
    See xl.conf manpage for details.
    
    This is part of XSA-273 / CVE-2018-3646.
    
    Signed-off-by: George Dunlap <george.dunlap@xxxxxxxxxx>
    Signed-off-by: Wei Liu <wei.liu2@xxxxxxxxxx>
    (cherry picked from commit aa67b97ed34279c43a43d9ca46727b5746caa92e)
    
    PVH guest type in toolstack is not available in this version of Xen.
    Change code and manpage to cope.
    
    Signed-off-by: Wei Liu <wei.liu2@xxxxxxxxxx>
---
 docs/man/xl.conf.pod.5  | 22 ++++++++++++++
 tools/examples/xl.conf  |  5 ++++
 tools/xl/xl.c           | 26 ++++++++++++++++
 tools/xl/xl.h           |  7 +++++
 tools/xl/xl_cmdtable.c  |  6 ++--
 tools/xl/xl_vcpu.c      | 79 +++++++++++++++++++++++++++++++++++++++++++++++--
 tools/xl/xl_vmcontrol.c | 39 ++++++++++++++++++++++--
 7 files changed, 178 insertions(+), 6 deletions(-)

diff --git a/docs/man/xl.conf.pod.5 b/docs/man/xl.conf.pod.5
index 8f7fd28882..4b62252789 100644
--- a/docs/man/xl.conf.pod.5
+++ b/docs/man/xl.conf.pod.5
@@ -176,6 +176,28 @@ massively huge guests).
 
 =back
 
+=item B<vm.cpumask>="CPULIST"
+
+=item B<vm.hvm.cpumask>="CPULIST"
+
+=item B<vm.pv.cpumask>="CPULIST"
+
+Global masks that are applied when creating guests and pinning vcpus
+to indicate which cpus they are allowed to run on.  Specifically,
+C<vm.cpumask> applies to all guest types, C<vm.hvm.cpumask> applies to
+HVM guests and C<vm.pv.cpumask> applies to PV guests.
+
+The hard affinity of guest's vcpus are logical-AND'ed with respective
+masks. If the resulting affinity mask is empty, operation will fail.
+
+Use --ignore-global-affinity-masks to skip applying global masks.
+
+The default value for these masks are all 1's, i.e. all cpus are allowed.
+
+Due to bug(s), these options may not interact well with other options
+concerning CPU affinity. One example is CPU pools. Users should always double
+check that the required affinity has taken effect.
+
 =back
 
 =head1 SEE ALSO
diff --git a/tools/examples/xl.conf b/tools/examples/xl.conf
index 374b6bbc2e..0446deb304 100644
--- a/tools/examples/xl.conf
+++ b/tools/examples/xl.conf
@@ -37,3 +37,8 @@
 # (which can take a long time to find out if launching huge guests).
 # see xl.conf(5) for details.
 #claim_mode=1
+
+# Specify global vcpu hard affinity masks. See xl.conf(5) for details.
+#vm.cpumask="0-7"
+#vm.pv.cpumask="0-3"
+#vm.hvm.cpumask="3-7"
diff --git a/tools/xl/xl.c b/tools/xl/xl.c
index 02179a6229..92d82e4482 100644
--- a/tools/xl/xl.c
+++ b/tools/xl/xl.c
@@ -28,6 +28,9 @@
 #include <libxl_utils.h>
 #include <libxlutil.h>
 #include "xl.h"
+#include "xl_parse.h"
+
+#include "xl_utils.h"
 
 xentoollog_logger_stdiostream *logger;
 int dryrun_only;
@@ -42,6 +45,9 @@ char *default_gatewaydev = NULL;
 char *default_vifbackend = NULL;
 char *default_remus_netbufscript = NULL;
 char *default_colo_proxy_script = NULL;
+libxl_bitmap global_vm_affinity_mask;
+libxl_bitmap global_hvm_affinity_mask;
+libxl_bitmap global_pv_affinity_mask;
 enum output_format default_output_format = OUTPUT_FORMAT_JSON;
 int claim_mode = 1;
 bool progress_use_cr = 0;
@@ -188,6 +194,26 @@ static void parse_global_config(const char *configfile,
     xlu_cfg_replace_string (config, "colo.default.proxyscript",
         &default_colo_proxy_script, 0);
 
+    libxl_bitmap_init(&global_vm_affinity_mask);
+    libxl_cpu_bitmap_alloc(ctx, &global_vm_affinity_mask, 0);
+    libxl_bitmap_init(&global_hvm_affinity_mask);
+    libxl_cpu_bitmap_alloc(ctx, &global_hvm_affinity_mask, 0);
+    libxl_bitmap_init(&global_pv_affinity_mask);
+    libxl_cpu_bitmap_alloc(ctx, &global_pv_affinity_mask, 0);
+
+    if (!xlu_cfg_get_string (config, "vm.cpumask", &buf, 0))
+        parse_cpurange(buf, &global_vm_affinity_mask);
+    else
+        libxl_bitmap_set_any(&global_vm_affinity_mask);
+    if (!xlu_cfg_get_string (config, "vm.hvm.cpumask", &buf, 0))
+        parse_cpurange(buf, &global_hvm_affinity_mask);
+    else
+       libxl_bitmap_set_any(&global_hvm_affinity_mask);
+    if (!xlu_cfg_get_string (config, "vm.pv.cpumask", &buf, 0))
+        parse_cpurange(buf, &global_pv_affinity_mask);
+    else
+        libxl_bitmap_set_any(&global_pv_affinity_mask);
+
     xlu_cfg_destroy(config);
 }
 
diff --git a/tools/xl/xl.h b/tools/xl/xl.h
index aa95b77146..3de6feaed9 100644
--- a/tools/xl/xl.h
+++ b/tools/xl/xl.h
@@ -41,6 +41,7 @@ struct domain_create {
     int vncautopass;
     int console_autoconnect;
     int checkpointed_stream;
+    int ignore_global_affinity_masks;
     const char *config_file;
     char *extra_config; /* extra config string */
     const char *restore_file;
@@ -273,6 +274,9 @@ extern char *default_vifbackend;
 extern char *default_remus_netbufscript;
 extern char *default_colo_proxy_script;
 extern char *blkdev_start;
+extern libxl_bitmap global_vm_affinity_mask;
+extern libxl_bitmap global_hvm_affinity_mask;
+extern libxl_bitmap global_pv_affinity_mask;
 
 enum output_format {
     OUTPUT_FORMAT_JSON,
@@ -288,6 +292,9 @@ typedef enum {
 } domain_restart_type;
 
 extern void printf_info_sexp(int domid, libxl_domain_config *d_config, FILE 
*fh);
+extern void apply_global_affinity_masks(libxl_domain_type type,
+                                        libxl_bitmap *vcpu_affinity_array,
+                                        unsigned int size);
 
 #define XL_GLOBAL_CONFIG XEN_CONFIG_DIR "/xl.conf"
 #define XL_LOCK_FILE XEN_LOCK_DIR "/xl"
diff --git a/tools/xl/xl_cmdtable.c b/tools/xl/xl_cmdtable.c
index 30eb93c17f..7ea95bce03 100644
--- a/tools/xl/xl_cmdtable.c
+++ b/tools/xl/xl_cmdtable.c
@@ -34,7 +34,8 @@ struct cmd_spec cmd_table[] = {
       "-e                      Do not wait in the background for the death of 
the domain.\n"
       "-V, --vncviewer         Connect to the VNC display after the domain is 
created.\n"
       "-A, --vncviewer-autopass\n"
-      "                        Pass VNC password to viewer via stdin."
+      "                        Pass VNC password to viewer via stdin.\n"
+      "--ignore-global-affinity-masks Ignore global masks in xl.conf."
     },
     { "config-update",
       &main_config_update, 1, 1,
@@ -224,7 +225,8 @@ struct cmd_spec cmd_table[] = {
       &main_vcpupin, 1, 1,
       "Set which CPUs a VCPU can use",
       "[option] <Domain> <VCPU|all> <Hard affinity|-|all> <Soft 
affinity|-|all>",
-      "-f, --force        undo an override pinning done by the kernel",
+      "-f, --force        undo an override pinning done by the kernel\n"
+      "--ignore-global-affinity-masks Ignore global masks in xl.conf",
     },
     { "vcpu-set",
       &main_vcpuset, 0, 1,
diff --git a/tools/xl/xl_vcpu.c b/tools/xl/xl_vcpu.c
index 8e735b38c1..1da2d8313a 100644
--- a/tools/xl/xl_vcpu.c
+++ b/tools/xl/xl_vcpu.c
@@ -68,6 +68,60 @@ static void print_domain_vcpuinfo(uint32_t domid, uint32_t 
nr_cpus)
     libxl_vcpuinfo_list_free(vcpuinfo, nb_vcpu);
 }
 
+void apply_global_affinity_masks(libxl_domain_type type,
+                                 libxl_bitmap *vcpu_affinity_array,
+                                 unsigned int size)
+{
+    libxl_bitmap *mask = &global_vm_affinity_mask;
+    libxl_bitmap *type_mask;
+    unsigned int i;
+
+    switch (type) {
+    case LIBXL_DOMAIN_TYPE_HVM:
+        type_mask = &global_hvm_affinity_mask;
+        break;
+    case LIBXL_DOMAIN_TYPE_PV:
+        type_mask = &global_pv_affinity_mask;
+        break;
+    default:
+        fprintf(stderr, "Unknown guest type\n");
+        exit(EXIT_FAILURE);
+    }
+
+    for (i = 0; i < size; i++) {
+        int rc;
+        libxl_bitmap *t = &vcpu_affinity_array[i];
+        libxl_bitmap b1, b2;
+
+        libxl_bitmap_init(&b1);
+        libxl_bitmap_init(&b2);
+
+        rc = libxl_bitmap_and(ctx, &b1, t, mask);
+        if (rc) {
+            fprintf(stderr, "libxl_bitmap_and errored\n");
+            exit(EXIT_FAILURE);
+        }
+        rc = libxl_bitmap_and(ctx, &b2, &b1, type_mask);
+        if (rc) {
+            fprintf(stderr, "libxl_bitmap_and errored\n");
+            exit(EXIT_FAILURE);
+        }
+
+        if (libxl_bitmap_is_empty(&b2)) {
+            fprintf(stderr, "vcpu hard affinity map is empty\n");
+            exit(EXIT_FAILURE);
+        }
+
+        /* Replace target bitmap with the result */
+        libxl_bitmap_dispose(t);
+        libxl_bitmap_init(t);
+        libxl_bitmap_copy_alloc(ctx, t, &b2);
+
+        libxl_bitmap_dispose(&b1);
+        libxl_bitmap_dispose(&b2);
+    }
+}
+
 static void vcpulist(int argc, char **argv)
 {
     libxl_dominfo *dominfo;
@@ -118,6 +172,7 @@ int main_vcpupin(int argc, char **argv)
 {
     static struct option opts[] = {
         {"force", 0, 0, 'f'},
+        {"ignore-global-affinity-masks", 0, 0, 'i'},
         COMMON_LONG_OPTS
     };
     libxl_vcpuinfo *vcpuinfo;
@@ -132,15 +187,18 @@ int main_vcpupin(int argc, char **argv)
     const char *vcpu, *hard_str, *soft_str;
     char *endptr;
     int opt, nb_cpu, nb_vcpu, rc = EXIT_FAILURE;
-    bool force = false;
+    bool force = false, ignore_masks = false;
 
     libxl_bitmap_init(&cpumap_hard);
     libxl_bitmap_init(&cpumap_soft);
 
-    SWITCH_FOREACH_OPT(opt, "f", opts, "vcpu-pin", 3) {
+    SWITCH_FOREACH_OPT(opt, "fi", opts, "vcpu-pin", 3) {
     case 'f':
         force = true;
         break;
+    case 'i':
+        ignore_masks = true;
+        break;
     default:
         break;
     }
@@ -222,6 +280,23 @@ int main_vcpupin(int argc, char **argv)
         goto out;
     }
 
+    /* Only hard affinity matters here */
+    if (!ignore_masks) {
+        libxl_domain_config d_config;
+
+        libxl_domain_config_init(&d_config);
+        rc = libxl_retrieve_domain_configuration(ctx, domid, &d_config);
+        if (rc) {
+            fprintf(stderr, "Could not retrieve domain configuration\n");
+            libxl_domain_config_dispose(&d_config);
+            goto out;
+        }
+
+        apply_global_affinity_masks(d_config.b_info.type, hard, 1);
+
+        libxl_domain_config_dispose(&d_config);
+    }
+
     if (force) {
         if (libxl_set_vcpuaffinity_force(ctx, domid, vcpuid, hard, soft)) {
             fprintf(stderr, "Could not set affinity for vcpu `%ld'.\n",
diff --git a/tools/xl/xl_vmcontrol.c b/tools/xl/xl_vmcontrol.c
index 89c2b25ded..a1d633795c 100644
--- a/tools/xl/xl_vmcontrol.c
+++ b/tools/xl/xl_vmcontrol.c
@@ -804,6 +804,36 @@ int create_domain(struct domain_create *dom_info)
         parse_config_data(config_source, config_data, config_len, &d_config);
     }
 
+    if (!dom_info->ignore_global_affinity_masks) {
+        libxl_domain_build_info *b_info = &d_config.b_info;
+
+        /* It is possible that no hard affinity is specified in config file.
+         * Generate hard affinity maps now if we care about those.
+         */
+        if (b_info->num_vcpu_hard_affinity == 0 &&
+              (!libxl_bitmap_is_full(&global_vm_affinity_mask) ||
+                 (b_info->type == LIBXL_DOMAIN_TYPE_PV &&
+                  !libxl_bitmap_is_full(&global_pv_affinity_mask)) ||
+                 (b_info->type != LIBXL_DOMAIN_TYPE_PV &&
+                  !libxl_bitmap_is_full(&global_hvm_affinity_mask))
+               )) {
+            b_info->num_vcpu_hard_affinity = b_info->max_vcpus;
+            b_info->vcpu_hard_affinity =
+                xmalloc(b_info->max_vcpus * sizeof(libxl_bitmap));
+
+            for (i = 0; i < b_info->num_vcpu_hard_affinity; i++) {
+                libxl_bitmap *m = &b_info->vcpu_hard_affinity[i];
+                libxl_bitmap_init(m);
+                libxl_cpu_bitmap_alloc(ctx, m, 0);
+                libxl_bitmap_set_any(m);
+            }
+        }
+
+        apply_global_affinity_masks(b_info->type,
+                                    b_info->vcpu_hard_affinity,
+                                    b_info->num_vcpu_hard_affinity);
+    }
+
     if (migrate_fd >= 0) {
         if (d_config.c_info.name) {
             /* when we receive a domain we get its name from the config
@@ -1124,7 +1154,7 @@ int main_create(int argc, char **argv)
     const char *filename = NULL;
     struct domain_create dom_info;
     int paused = 0, debug = 0, daemonize = 1, console_autoconnect = 0,
-        quiet = 0, monitor = 1, vnc = 0, vncautopass = 0;
+        quiet = 0, monitor = 1, vnc = 0, vncautopass = 0, ignore_masks = 0;
     int opt, rc;
     static struct option opts[] = {
         {"dryrun", 0, 0, 'n'},
@@ -1132,6 +1162,7 @@ int main_create(int argc, char **argv)
         {"defconfig", 1, 0, 'f'},
         {"vncviewer", 0, 0, 'V'},
         {"vncviewer-autopass", 0, 0, 'A'},
+        {"ignore-global-affinity-masks", 0, 0, 'i'},
         COMMON_LONG_OPTS
     };
 
@@ -1142,7 +1173,7 @@ int main_create(int argc, char **argv)
         argc--; argv++;
     }
 
-    SWITCH_FOREACH_OPT(opt, "Fnqf:pcdeVA", opts, "create", 0) {
+    SWITCH_FOREACH_OPT(opt, "Fnqf:pcdeVAi", opts, "create", 0) {
     case 'f':
         filename = optarg;
         break;
@@ -1174,6 +1205,9 @@ int main_create(int argc, char **argv)
     case 'A':
         vnc = vncautopass = 1;
         break;
+    case 'i':
+        ignore_masks = 1;
+        break;
     }
 
     memset(&dom_info, 0, sizeof(dom_info));
@@ -1203,6 +1237,7 @@ int main_create(int argc, char **argv)
     dom_info.vnc = vnc;
     dom_info.vncautopass = vncautopass;
     dom_info.console_autoconnect = console_autoconnect;
+    dom_info.ignore_global_affinity_masks = ignore_masks;
 
     rc = create_domain(&dom_info);
     if (rc < 0) {
--
generated by git-patchbot for /home/xen/git/xen.git#staging-4.9

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxxx
https://lists.xenproject.org/xen-changelog

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.