Xen project Mailing List

Re: [Xen-devel] [PATCH v9 13/15] x86/microcode: Synchronize late microcode loading

Date: Thu, 29 Aug 2019 14:06:39 +0200

Cc: Kevin Tian <kevin.tian@xxxxxxxxx>, Ashok Raj <ashok.raj@xxxxxxxxx>, Wei Liu <wl@xxxxxxx>, Andrew Cooper <andrew.cooper3@xxxxxxxxxx>, Jun Nakajima <jun.nakajima@xxxxxxxxx>, xen-devel@xxxxxxxxxxxxxxxxxxxx, Thomas Gleixner <tglx@xxxxxxxxxxxxx>, Borislav Petkov <bp@xxxxxxx>, Roger Pau Monné <roger.pau@xxxxxxxxxx>

Delivery-date: Thu, 29 Aug 2019 12:06:45 +0000

List-id: Xen developer discussion <xen-devel.lists.xenproject.org>

On 19.08.2019 03:25, Chao Gao wrote: > @@ -232,6 +276,34 @@ bool microcode_update_cache(struct microcode_patch > *patch) > return true; > } > > +/* Wait for a condition to be met with a timeout (us). */ > +static int wait_for_condition(int (*func)(void *data), void *data, > + unsigned int timeout) > +{ > + while ( !func(data) ) > + { > + if ( !timeout-- ) > + { > + printk("CPU%u: Timeout in %pS\n", > + smp_processor_id(), __builtin_return_address(0)); > + return -EBUSY; > + } > + udelay(1); > + } > + > + return 0; > +} > + > +static int wait_cpu_callin(void *nr) > +{ > + return cpumask_weight(&cpu_callin_map) >= (unsigned long)nr; > +} > + > +static int wait_cpu_callout(void *nr) > +{ > + return atomic_read(&cpu_out) >= (unsigned long)nr; > +} Since wait_for_condition() is used with only these two functions as callbacks, they should imo return bool and take const void *. > @@ -265,37 +337,155 @@ static int microcode_update_cpu(const struct > microcode_patch *patch) > return err; > } > > -static long do_microcode_update(void *patch) > +static int slave_thread_fn(void) > +{ > + unsigned int cpu = smp_processor_id(); > + unsigned int master = cpumask_first(this_cpu(cpu_sibling_mask)); > + > + while ( loading_state != LOADING_CALLIN ) > + cpu_relax(); > + > + cpumask_set_cpu(cpu, &cpu_callin_map); > + > + while ( loading_state != LOADING_EXIT ) > + cpu_relax(); > + > + /* Copy update revision from the "master" thread. */ > + this_cpu(cpu_sig).rev = per_cpu(cpu_sig, master).rev; > + > + return 0; > +} > + > +static int master_thread_fn(const struct microcode_patch *patch) > +{ > + unsigned int cpu = smp_processor_id(); > + int ret = 0; > + > + while ( loading_state != LOADING_CALLIN ) > + cpu_relax(); > + > + cpumask_set_cpu(cpu, &cpu_callin_map); > + > + while ( loading_state != LOADING_ENTER ) > + cpu_relax(); > + > + /* > + * If an error happened, control thread would set 'loading_state' > + * to LOADING_EXIT. Don't perform ucode loading for this case > + */ > + if ( loading_state == LOADING_EXIT ) > + return ret; Even if the producer transitions this through ENTER to EXIT, the observer here may never get to see the ENTER state, and hence never exit the loop above. You want either < ENTER or == CALLIN. > + ret = microcode_ops->apply_microcode(patch); > + if ( !ret ) > + atomic_inc(&cpu_updated); > + atomic_inc(&cpu_out); > + > + while ( loading_state != LOADING_EXIT ) > + cpu_relax(); > + > + return ret; > +} As a cosmetic remark, I don't think "master" and "slave" are suitable terms here. "primary" and "secondary" would imo come closer to what the threads' relationship is. > +static int control_thread_fn(const struct microcode_patch *patch) > { > - unsigned int cpu; > + unsigned int cpu = smp_processor_id(), done; > + unsigned long tick; > + int ret; > > - /* Store the patch after a successful loading */ > - if ( !microcode_update_cpu(patch) && patch ) > + /* Allow threads to call in */ > + loading_state = LOADING_CALLIN; > + smp_mb(); Why not just smp_wmb()? (Same further down then.) > + cpumask_set_cpu(cpu, &cpu_callin_map); > + > + /* Waiting for all threads calling in */ > + ret = wait_for_condition(wait_cpu_callin, > + (void *)(unsigned long)num_online_cpus(), > + MICROCODE_CALLIN_TIMEOUT_US); > + if ( ret ) { Misplaced brace. > +static int do_microcode_update(void *patch) const? > @@ -326,19 +523,67 @@ int microcode_update(XEN_GUEST_HANDLE_PARAM(const_void) > buf, unsigned long len) > { > ret = PTR_ERR(patch); > printk(XENLOG_INFO "Parsing microcode blob error %d\n", ret); > - goto free; > + goto put; > } > > if ( !patch ) > { > printk(XENLOG_INFO "No ucode found. Update aborted!\n"); > ret = -EINVAL; > - goto free; > + goto put; > + } > + > + cpumask_clear(&cpu_callin_map); > + atomic_set(&cpu_out, 0); > + atomic_set(&cpu_updated, 0); > + loading_state = LOADING_PREPARE; > + > + /* Calculate the number of online CPU core */ > + nr_cores = 0; > + for_each_online_cpu(cpu) > + if ( cpu == cpumask_first(per_cpu(cpu_sibling_mask, cpu)) ) > + nr_cores++; > + > + printk(XENLOG_INFO "%u cores are to update their microcode\n", nr_cores); > + > + /* > + * We intend to disable interrupt for long time, which may lead to > + * watchdog timeout. > + */ > + watchdog_disable(); > + /* > + * Late loading dance. Why the heavy-handed stop_machine effort? > + * > + * - HT siblings must be idle and not execute other code while the other > + * sibling is loading microcode in order to avoid any negative > + * interactions cause by the loading. > + * > + * - In addition, microcode update on the cores must be serialized until > + * this requirement can be relaxed in the future. Right now, this is > + * conservative and good. > + */ > + ret = stop_machine_run(do_microcode_update, patch, NR_CPUS); > + watchdog_enable(); Considering that stop_machine_run() doesn't itself disable the watchdog, did you consider having the control thread disable/enable the watchdog, thus shortening the period where it's not active? > + updated = atomic_read(&cpu_updated); > + if ( updated > 0 ) > + { > + spin_lock(&microcode_mutex); > + microcode_update_cache(patch); > + spin_unlock(&microcode_mutex); > } > + else > + microcode_free_patch(patch); > > - ret = continue_hypercall_on_cpu(cpumask_first(&cpu_online_map), > - do_microcode_update, patch); > + if ( updated && updated != nr_cores ) > + printk(XENLOG_ERR "ERROR: Updating microcode succeeded on %u cores > and failed\n" > + XENLOG_ERR "on other %u cores. A system with differing > microcode \n" Stray blank before newline. > + XENLOG_ERR "revisions is considered unstable. Please reboot > and do not\n" > + XENLOG_ERR "load the microcode that triggersthis warning!\n", Missing blank before "this". Jan _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxxxxxxxxx https://lists.xenproject.org/mailman/listinfo/xen-devel

©2013 Xen Project, A Linux Foundation Collaborative Project. All Rights Reserved.
Linux Foundation is a registered trademark of The Linux Foundation.
Xen Project is a trademark of The Linux Foundation.