Xen project Mailing List

[xen staging-4.17] locking: attempt to ensure lock wrappers are always inline

Date: Tue, 12 Mar 2024 17:17:27 +0000

Delivery-date: Tue, 12 Mar 2024 17:17:29 +0000

List-id: "Change log for Mercurial \(receive only\)" <xen-changelog.lists.xenproject.org>

commit 2cc5e57be680a516aa5cdef4281856d09b9d0ea6 Author: Roger Pau MonnÃ© <roger.pau@xxxxxxxxxx> AuthorDate: Mon Mar 4 14:29:36 2024 +0100 Commit: Andrew Cooper <andrew.cooper3@xxxxxxxxxx> CommitDate: Tue Mar 12 16:13:03 2024 +0000 locking: attempt to ensure lock wrappers are always inline In order to prevent the locking speculation barriers from being inside of `call`ed functions that could be speculatively bypassed. While there also add an extra locking barrier to _mm_write_lock() in the branch taken when the lock is already held. Note some functions are switched to use the unsafe variants (without speculation barrier) of the locking primitives, but a speculation barrier is always added to the exposed public lock wrapping helper. That's the case with sched_spin_lock_double() or pcidevs_lock() for example. This is part of XSA-453 / CVE-2024-2193 Signed-off-by: Roger Pau MonnÃ© <roger.pau@xxxxxxxxxx> Reviewed-by: Jan Beulich <jbeulich@xxxxxxxx> (cherry picked from commit 197ecd838a2aaf959a469df3696d4559c4f8b762) --- xen/arch/x86/hvm/vpt.c | 10 +++++++--- xen/arch/x86/include/asm/irq.h | 1 + xen/arch/x86/mm/mm-locks.h | 28 +++++++++++++++------------- xen/arch/x86/mm/p2m-pod.c | 2 +- xen/common/event_channel.c | 5 +++-- xen/common/grant_table.c | 6 +++--- xen/common/sched/core.c | 19 ++++++++++++------- xen/common/sched/private.h | 26 ++++++++++++++++++++++++-- xen/common/timer.c | 8 +++++--- xen/drivers/passthrough/pci.c | 5 +++-- xen/include/xen/event.h | 4 ++-- xen/include/xen/pci.h | 8 ++++++-- 12 files changed, 82 insertions(+), 40 deletions(-) diff --git a/xen/arch/x86/hvm/vpt.c b/xen/arch/x86/hvm/vpt.c index cb1d81bf9e..66f1095245 100644 --- a/xen/arch/x86/hvm/vpt.c +++ b/xen/arch/x86/hvm/vpt.c @@ -161,7 +161,7 @@ static int pt_irq_masked(struct periodic_time *pt) * pt->vcpu field, because another thread holding the pt_migrate lock * may already be spinning waiting for your vcpu lock. */ -static void pt_vcpu_lock(struct vcpu *v) +static always_inline void pt_vcpu_lock(struct vcpu *v) { spin_lock(&v->arch.hvm.tm_lock); } @@ -180,9 +180,13 @@ static void pt_vcpu_unlock(struct vcpu *v) * need to take an additional lock that protects against pt->vcpu * changing. */ -static void pt_lock(struct periodic_time *pt) +static always_inline void pt_lock(struct periodic_time *pt) { - read_lock(&pt->vcpu->domain->arch.hvm.pl_time->pt_migrate); + /* + * Use the speculation unsafe variant for the first lock, as the following + * lock taking helper already includes a speculation barrier. + */ + _read_lock(&pt->vcpu->domain->arch.hvm.pl_time->pt_migrate); spin_lock(&pt->vcpu->arch.hvm.tm_lock); } diff --git a/xen/arch/x86/include/asm/irq.h b/xen/arch/x86/include/asm/irq.h index f6a0207a80..823d627fd0 100644 --- a/xen/arch/x86/include/asm/irq.h +++ b/xen/arch/x86/include/asm/irq.h @@ -178,6 +178,7 @@ void cf_check irq_complete_move(struct irq_desc *); extern struct irq_desc *irq_desc; +/* Not speculation safe, only used for AP bringup. */ void lock_vector_lock(void); void unlock_vector_lock(void); diff --git a/xen/arch/x86/mm/mm-locks.h b/xen/arch/x86/mm/mm-locks.h index c1523aeccf..265239c49f 100644 --- a/xen/arch/x86/mm/mm-locks.h +++ b/xen/arch/x86/mm/mm-locks.h @@ -86,8 +86,8 @@ static inline void _set_lock_level(int l) this_cpu(mm_lock_level) = l; } -static inline void _mm_lock(const struct domain *d, mm_lock_t *l, - const char *func, int level, int rec) +static always_inline void _mm_lock(const struct domain *d, mm_lock_t *l, + const char *func, int level, int rec) { if ( !((mm_locked_by_me(l)) && rec) ) _check_lock_level(d, level); @@ -137,8 +137,8 @@ static inline int mm_write_locked_by_me(mm_rwlock_t *l) return (l->locker == get_processor_id()); } -static inline void _mm_write_lock(const struct domain *d, mm_rwlock_t *l, - const char *func, int level) +static always_inline void _mm_write_lock(const struct domain *d, mm_rwlock_t *l, + const char *func, int level) { if ( !mm_write_locked_by_me(l) ) { @@ -149,6 +149,8 @@ static inline void _mm_write_lock(const struct domain *d, mm_rwlock_t *l, l->unlock_level = _get_lock_level(); _set_lock_level(_lock_level(d, level)); } + else + block_speculation(); l->recurse_count++; } @@ -162,8 +164,8 @@ static inline void mm_write_unlock(mm_rwlock_t *l) percpu_write_unlock(p2m_percpu_rwlock, &l->lock); } -static inline void _mm_read_lock(const struct domain *d, mm_rwlock_t *l, - int level) +static always_inline void _mm_read_lock(const struct domain *d, mm_rwlock_t *l, + int level) { _check_lock_level(d, level); percpu_read_lock(p2m_percpu_rwlock, &l->lock); @@ -178,15 +180,15 @@ static inline void mm_read_unlock(mm_rwlock_t *l) /* This wrapper uses the line number to express the locking order below */ #define declare_mm_lock(name) \ - static inline void mm_lock_##name(const struct domain *d, mm_lock_t *l, \ - const char *func, int rec) \ + static always_inline void mm_lock_##name( \ + const struct domain *d, mm_lock_t *l, const char *func, int rec) \ { _mm_lock(d, l, func, MM_LOCK_ORDER_##name, rec); } #define declare_mm_rwlock(name) \ - static inline void mm_write_lock_##name(const struct domain *d, \ - mm_rwlock_t *l, const char *func) \ + static always_inline void mm_write_lock_##name( \ + const struct domain *d, mm_rwlock_t *l, const char *func) \ { _mm_write_lock(d, l, func, MM_LOCK_ORDER_##name); } \ - static inline void mm_read_lock_##name(const struct domain *d, \ - mm_rwlock_t *l) \ + static always_inline void mm_read_lock_##name(const struct domain *d, \ + mm_rwlock_t *l) \ { _mm_read_lock(d, l, MM_LOCK_ORDER_##name); } /* These capture the name of the calling function */ #define mm_lock(name, d, l) mm_lock_##name(d, l, __func__, 0) @@ -321,7 +323,7 @@ declare_mm_lock(altp2mlist) #define MM_LOCK_ORDER_altp2m 40 declare_mm_rwlock(altp2m); -static inline void p2m_lock(struct p2m_domain *p) +static always_inline void p2m_lock(struct p2m_domain *p) { if ( p2m_is_altp2m(p) ) mm_write_lock(altp2m, p->domain, &p->lock); diff --git a/xen/arch/x86/mm/p2m-pod.c b/xen/arch/x86/mm/p2m-pod.c index fc110506dc..99dbcb3101 100644 --- a/xen/arch/x86/mm/p2m-pod.c +++ b/xen/arch/x86/mm/p2m-pod.c @@ -36,7 +36,7 @@ #define superpage_aligned(_x) (((_x)&(SUPERPAGE_PAGES-1))==0) /* Enforce lock ordering when grabbing the "external" page_alloc lock */ -static inline void lock_page_alloc(struct p2m_domain *p2m) +static always_inline void lock_page_alloc(struct p2m_domain *p2m) { page_alloc_mm_pre_lock(p2m->domain); spin_lock(&(p2m->domain->page_alloc_lock)); diff --git a/xen/common/event_channel.c b/xen/common/event_channel.c index f5e0b12d15..dada9f15f5 100644 --- a/xen/common/event_channel.c +++ b/xen/common/event_channel.c @@ -62,7 +62,7 @@ * just assume the event channel is free or unbound at the moment when the * evtchn_read_trylock() returns false. */ -static inline void evtchn_write_lock(struct evtchn *evtchn) +static always_inline void evtchn_write_lock(struct evtchn *evtchn) { write_lock(&evtchn->lock); @@ -364,7 +364,8 @@ int evtchn_alloc_unbound(evtchn_alloc_unbound_t *alloc, evtchn_port_t port) return rc; } -static void double_evtchn_lock(struct evtchn *lchn, struct evtchn *rchn) +static always_inline void double_evtchn_lock(struct evtchn *lchn, + struct evtchn *rchn) { ASSERT(lchn != rchn); diff --git a/xen/common/grant_table.c b/xen/common/grant_table.c index ee7cc496b8..62a8685cd5 100644 --- a/xen/common/grant_table.c +++ b/xen/common/grant_table.c @@ -410,7 +410,7 @@ static inline void act_set_gfn(struct active_grant_entry *act, gfn_t gfn) static DEFINE_PERCPU_RWLOCK_GLOBAL(grant_rwlock); -static inline void grant_read_lock(struct grant_table *gt) +static always_inline void grant_read_lock(struct grant_table *gt) { percpu_read_lock(grant_rwlock, &gt->lock); } @@ -420,7 +420,7 @@ static inline void grant_read_unlock(struct grant_table *gt) percpu_read_unlock(grant_rwlock, &gt->lock); } -static inline void grant_write_lock(struct grant_table *gt) +static always_inline void grant_write_lock(struct grant_table *gt) { percpu_write_lock(grant_rwlock, &gt->lock); } @@ -457,7 +457,7 @@ nr_active_grant_frames(struct grant_table *gt) return num_act_frames_from_sha_frames(nr_grant_frames(gt)); } -static inline struct active_grant_entry * +static always_inline struct active_grant_entry * active_entry_acquire(struct grant_table *t, grant_ref_t e) { struct active_grant_entry *act; diff --git a/xen/common/sched/core.c b/xen/common/sched/core.c index 078beb1adb..29bbab5ac6 100644 --- a/xen/common/sched/core.c +++ b/xen/common/sched/core.c @@ -348,23 +348,28 @@ uint64_t get_cpu_idle_time(unsigned int cpu) * This avoids dead- or live-locks when this code is running on both * cpus at the same time. */ -static void sched_spin_lock_double(spinlock_t *lock1, spinlock_t *lock2, - unsigned long *flags) +static always_inline void sched_spin_lock_double( + spinlock_t *lock1, spinlock_t *lock2, unsigned long *flags) { + /* + * In order to avoid extra overhead, use the locking primitives without the + * speculation barrier, and introduce a single barrier here. + */ if ( lock1 == lock2 ) { - spin_lock_irqsave(lock1, *flags); + *flags = _spin_lock_irqsave(lock1); } else if ( lock1 < lock2 ) { - spin_lock_irqsave(lock1, *flags); - spin_lock(lock2); + *flags = _spin_lock_irqsave(lock1); + _spin_lock(lock2); } else { - spin_lock_irqsave(lock2, *flags); - spin_lock(lock1); + *flags = _spin_lock_irqsave(lock2); + _spin_lock(lock1); } + block_lock_speculation(); } static void sched_spin_unlock_double(spinlock_t *lock1, spinlock_t *lock2, diff --git a/xen/common/sched/private.h b/xen/common/sched/private.h index 0527a8c70d..24a93dd0c1 100644 --- a/xen/common/sched/private.h +++ b/xen/common/sched/private.h @@ -207,8 +207,24 @@ DECLARE_PER_CPU(cpumask_t, cpumask_scratch); #define cpumask_scratch (&this_cpu(cpumask_scratch)) #define cpumask_scratch_cpu(c) (&per_cpu(cpumask_scratch, c)) +/* + * Deal with _spin_lock_irqsave() returning the flags value instead of storing + * it in a passed parameter. + */ +#define _sched_spinlock0(lock, irq) _spin_lock##irq(lock) +#define _sched_spinlock1(lock, irq, arg) ({ \ + BUILD_BUG_ON(sizeof(arg) != sizeof(unsigned long)); \ + (arg) = _spin_lock##irq(lock); \ +}) + +#define _sched_spinlock__(nr) _sched_spinlock ## nr +#define _sched_spinlock_(nr) _sched_spinlock__(nr) +#define _sched_spinlock(lock, irq, args...) \ + _sched_spinlock_(count_args(args))(lock, irq, ## args) + #define sched_lock(kind, param, cpu, irq, arg...) \ -static inline spinlock_t *kind##_schedule_lock##irq(param EXTRA_TYPE(arg)) \ +static always_inline spinlock_t \ +*kind##_schedule_lock##irq(param EXTRA_TYPE(arg)) \ { \ for ( ; ; ) \ { \ @@ -220,10 +236,16 @@ static inline spinlock_t *kind##_schedule_lock##irq(param EXTRA_TYPE(arg)) \ * \ * It may also be the case that v->processor may change but the \ * lock may be the same; this will succeed in that case. \ + * \ + * Use the speculation unsafe locking helper, there's a speculation \ + * barrier before returning to the caller. \ */ \ - spin_lock##irq(lock, ## arg); \ + _sched_spinlock(lock, irq, ## arg); \ if ( likely(lock == get_sched_res(cpu)->schedule_lock) ) \ + { \ + block_lock_speculation(); \ return lock; \ + } \ spin_unlock##irq(lock, ## arg); \ } \ } diff --git a/xen/common/timer.c b/xen/common/timer.c index 9b5016d5ed..459668d417 100644 --- a/xen/common/timer.c +++ b/xen/common/timer.c @@ -240,7 +240,7 @@ static inline void deactivate_timer(struct timer *timer) list_add(&timer->inactive, &per_cpu(timers, timer->cpu).inactive); } -static inline bool_t timer_lock(struct timer *timer) +static inline bool_t timer_lock_unsafe(struct timer *timer) { unsigned int cpu; @@ -254,7 +254,8 @@ static inline bool_t timer_lock(struct timer *timer) rcu_read_unlock(&timer_cpu_read_lock); return 0; } - spin_lock(&per_cpu(timers, cpu).lock); + /* Use the speculation unsafe variant, the wrapper has the barrier. */ + _spin_lock(&per_cpu(timers, cpu).lock); if ( likely(timer->cpu == cpu) ) break; spin_unlock(&per_cpu(timers, cpu).lock); @@ -267,8 +268,9 @@ static inline bool_t timer_lock(struct timer *timer) #define timer_lock_irqsave(t, flags) ({ \ bool_t __x; \ local_irq_save(flags); \ - if ( !(__x = timer_lock(t)) ) \ + if ( !(__x = timer_lock_unsafe(t)) ) \ local_irq_restore(flags); \ + block_lock_speculation(); \ __x; \ }) diff --git a/xen/drivers/passthrough/pci.c b/xen/drivers/passthrough/pci.c index 8c62b14d19..1b3d285166 100644 --- a/xen/drivers/passthrough/pci.c +++ b/xen/drivers/passthrough/pci.c @@ -52,9 +52,10 @@ struct pci_seg { static spinlock_t _pcidevs_lock = SPIN_LOCK_UNLOCKED; -void pcidevs_lock(void) +/* Do not use, as it has no speculation barrier, use pcidevs_lock() instead. */ +void pcidevs_lock_unsafe(void) { - spin_lock_recursive(&_pcidevs_lock); + _spin_lock_recursive(&_pcidevs_lock); } void pcidevs_unlock(void) diff --git a/xen/include/xen/event.h b/xen/include/xen/event.h index 8eae9984a9..dd96e84c69 100644 --- a/xen/include/xen/event.h +++ b/xen/include/xen/event.h @@ -114,12 +114,12 @@ void notify_via_xen_event_channel(struct domain *ld, int lport); #define bucket_from_port(d, p) \ ((group_from_port(d, p))[((p) % EVTCHNS_PER_GROUP) / EVTCHNS_PER_BUCKET]) -static inline void evtchn_read_lock(struct evtchn *evtchn) +static always_inline void evtchn_read_lock(struct evtchn *evtchn) { read_lock(&evtchn->lock); } -static inline bool evtchn_read_trylock(struct evtchn *evtchn) +static always_inline bool evtchn_read_trylock(struct evtchn *evtchn) { return read_trylock(&evtchn->lock); } diff --git a/xen/include/xen/pci.h b/xen/include/xen/pci.h index 5975ca2f30..b373f139d1 100644 --- a/xen/include/xen/pci.h +++ b/xen/include/xen/pci.h @@ -155,8 +155,12 @@ struct pci_dev { * devices, it also sync the access to the msi capability that is not * interrupt handling related (the mask bit register). */ - -void pcidevs_lock(void); +void pcidevs_lock_unsafe(void); +static always_inline void pcidevs_lock(void) +{ + pcidevs_lock_unsafe(); + block_lock_speculation(); +} void pcidevs_unlock(void); bool_t __must_check pcidevs_locked(void); -- generated by git-patchbot for /home/xen/git/xen.git#staging-4.17

©2013 Xen Project, A Linux Foundation Collaborative Project. All Rights Reserved.
Linux Foundation is a registered trademark of The Linux Foundation.
Xen Project is a trademark of The Linux Foundation.