Xen project Mailing List

[xen staging-4.14] x86/shadow: tolerate failure in shadow_prealloc()

Date: Tue, 11 Oct 2022 13:44:55 +0000

Delivery-date: Tue, 11 Oct 2022 13:44:55 +0000

List-id: "Change log for Mercurial \(receive only\)" <xen-changelog.lists.xenproject.org>

commit b8f4a5de683efbe402db65483d845573c30dbb3f Author: Roger Pau MonnÃ© <roger.pau@xxxxxxxxxx> AuthorDate: Tue Oct 11 15:36:21 2022 +0200 Commit: Jan Beulich <jbeulich@xxxxxxxx> CommitDate: Tue Oct 11 15:36:21 2022 +0200 x86/shadow: tolerate failure in shadow_prealloc() Prevent _shadow_prealloc() from calling BUG() when unable to fulfill the pre-allocation and instead return true/false. Modify shadow_prealloc() to crash the domain on allocation failure (if the domain is not already dying), as shadow cannot operate normally after that. Modify callers to also gracefully handle {_,}shadow_prealloc() failing to fulfill the request. Note this in turn requires adjusting the callers of sh_make_monitor_table() also to handle it returning INVALID_MFN. sh_update_paging_modes() is also modified to add additional error paths in case of allocation failure, some of those will return with null monitor page tables (and the domain likely crashed). This is no different that current error paths, but the newly introduced ones are more likely to trigger. The now added failure points in sh_update_paging_modes() also require that on some error return paths the previous structures are cleared, and thus monitor table is null. While there adjust the 'type' parameter type of shadow_prealloc() to unsigned int rather than u32. This is part of CVE-2022-33746 / XSA-410. Signed-off-by: Roger Pau MonnÃ© <roger.pau@xxxxxxxxxx> Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx> Acked-by: Tim Deegan <tim@xxxxxxx> master commit: b7f93c6afb12b6061e2d19de2f39ea09b569ac68 master date: 2022-10-11 14:22:53 +0200 --- xen/arch/x86/mm/shadow/common.c | 62 ++++++++++++++++++++++++++++++---------- xen/arch/x86/mm/shadow/multi.c | 21 ++++++++++---- xen/arch/x86/mm/shadow/private.h | 3 +- 3 files changed, 65 insertions(+), 21 deletions(-) diff --git a/xen/arch/x86/mm/shadow/common.c b/xen/arch/x86/mm/shadow/common.c index 4436ea2c51..6f71636746 100644 --- a/xen/arch/x86/mm/shadow/common.c +++ b/xen/arch/x86/mm/shadow/common.c @@ -36,6 +36,7 @@ #include <asm/shadow.h> #include <asm/hvm/ioreq.h> #include <xen/numa.h> +#include <public/sched.h> #include "private.h" DEFINE_PER_CPU(uint32_t,trace_shadow_path_flags); @@ -927,14 +928,15 @@ static inline void trace_shadow_prealloc_unpin(struct domain *d, mfn_t smfn) /* Make sure there are at least count order-sized pages * available in the shadow page pool. */ -static void _shadow_prealloc(struct domain *d, unsigned int pages) +static bool __must_check _shadow_prealloc(struct domain *d, unsigned int pages) { struct vcpu *v; struct page_info *sp, *t; mfn_t smfn; int i; - if ( d->arch.paging.shadow.free_pages >= pages ) return; + if ( d->arch.paging.shadow.free_pages >= pages ) + return true; /* Shouldn't have enabled shadows if we've no vcpus. */ ASSERT(d->vcpu && d->vcpu[0]); @@ -950,7 +952,8 @@ static void _shadow_prealloc(struct domain *d, unsigned int pages) sh_unpin(d, smfn); /* See if that freed up enough space */ - if ( d->arch.paging.shadow.free_pages >= pages ) return; + if ( d->arch.paging.shadow.free_pages >= pages ) + return true; } /* Stage two: all shadow pages are in use in hierarchies that are @@ -971,7 +974,7 @@ static void _shadow_prealloc(struct domain *d, unsigned int pages) if ( d->arch.paging.shadow.free_pages >= pages ) { guest_flush_tlb_mask(d, d->dirty_cpumask); - return; + return true; } } } @@ -984,7 +987,12 @@ static void _shadow_prealloc(struct domain *d, unsigned int pages) d->arch.paging.shadow.total_pages, d->arch.paging.shadow.free_pages, d->arch.paging.shadow.p2m_pages); - BUG(); + + ASSERT(d->is_dying); + + guest_flush_tlb_mask(d, d->dirty_cpumask); + + return false; } /* Make sure there are at least count pages of the order according to @@ -992,9 +1000,19 @@ static void _shadow_prealloc(struct domain *d, unsigned int pages) * This must be called before any calls to shadow_alloc(). Since this * will free existing shadows to make room, it must be called early enough * to avoid freeing shadows that the caller is currently working on. */ -void shadow_prealloc(struct domain *d, u32 type, unsigned int count) +bool shadow_prealloc(struct domain *d, unsigned int type, unsigned int count) { - return _shadow_prealloc(d, shadow_size(type) * count); + bool ret = _shadow_prealloc(d, shadow_size(type) * count); + + if ( !ret && !d->is_dying && + (!d->is_shutting_down || d->shutdown_code != SHUTDOWN_crash) ) + /* + * Failing to allocate memory required for shadow usage can only result in + * a domain crash, do it here rather that relying on every caller to do it. + */ + domain_crash(d); + + return ret; } /* Deliberately free all the memory we can: this will tear down all of @@ -1211,7 +1229,7 @@ void shadow_free(struct domain *d, mfn_t smfn) static struct page_info * shadow_alloc_p2m_page(struct domain *d) { - struct page_info *pg; + struct page_info *pg = NULL; /* This is called both from the p2m code (which never holds the * paging lock) and the log-dirty code (which always does). */ @@ -1229,16 +1247,18 @@ shadow_alloc_p2m_page(struct domain *d) d->arch.paging.shadow.p2m_pages, shadow_min_acceptable_pages(d)); } - paging_unlock(d); - return NULL; + goto out; } - shadow_prealloc(d, SH_type_p2m_table, 1); + if ( !shadow_prealloc(d, SH_type_p2m_table, 1) ) + goto out; + pg = mfn_to_page(shadow_alloc(d, SH_type_p2m_table, 0)); d->arch.paging.shadow.p2m_pages++; d->arch.paging.shadow.total_pages--; ASSERT(!page_get_owner(pg) && !(pg->count_info & PGC_count_mask)); + out: paging_unlock(d); return pg; @@ -1329,7 +1349,9 @@ int shadow_set_allocation(struct domain *d, unsigned int pages, bool *preempted) else if ( d->arch.paging.shadow.total_pages > pages ) { /* Need to return memory to domheap */ - _shadow_prealloc(d, 1); + if ( !_shadow_prealloc(d, 1) ) + return -ENOMEM; + sp = page_list_remove_head(&d->arch.paging.shadow.freelist); ASSERT(sp); /* @@ -2397,12 +2419,13 @@ static void sh_update_paging_modes(struct vcpu *v) if ( mfn_eq(v->arch.paging.shadow.oos_snapshot[0], INVALID_MFN) ) { int i; + + if ( !shadow_prealloc(d, SH_type_oos_snapshot, SHADOW_OOS_PAGES) ) + return; + for(i = 0; i < SHADOW_OOS_PAGES; i++) - { - shadow_prealloc(d, SH_type_oos_snapshot, 1); v->arch.paging.shadow.oos_snapshot[i] = shadow_alloc(d, SH_type_oos_snapshot, 0); - } } #endif /* OOS */ @@ -2464,6 +2487,10 @@ static void sh_update_paging_modes(struct vcpu *v) if ( pagetable_is_null(v->arch.hvm.monitor_table) ) { mfn_t mmfn = v->arch.paging.mode->shadow.make_monitor_table(v); + + if ( mfn_eq(mmfn, INVALID_MFN) ) + return; + v->arch.hvm.monitor_table = pagetable_from_mfn(mmfn); make_cr3(v, mmfn); hvm_update_host_cr3(v); @@ -2501,6 +2528,11 @@ static void sh_update_paging_modes(struct vcpu *v) old_mfn = pagetable_get_mfn(v->arch.hvm.monitor_table); v->arch.hvm.monitor_table = pagetable_null(); new_mfn = v->arch.paging.mode->shadow.make_monitor_table(v); + if ( mfn_eq(new_mfn, INVALID_MFN) ) + { + old_mode->shadow.destroy_monitor_table(v, old_mfn); + return; + } v->arch.hvm.monitor_table = pagetable_from_mfn(new_mfn); SHADOW_PRINTK("new monitor table %"PRI_mfn "\n", mfn_x(new_mfn)); diff --git a/xen/arch/x86/mm/shadow/multi.c b/xen/arch/x86/mm/shadow/multi.c index c129b8103e..aaf56d295e 100644 --- a/xen/arch/x86/mm/shadow/multi.c +++ b/xen/arch/x86/mm/shadow/multi.c @@ -1535,7 +1535,8 @@ sh_make_monitor_table(struct vcpu *v) ASSERT(pagetable_get_pfn(v->arch.hvm.monitor_table) == 0); /* Guarantee we can get the memory we need */ - shadow_prealloc(d, SH_type_monitor_table, CONFIG_PAGING_LEVELS); + if ( !shadow_prealloc(d, SH_type_monitor_table, CONFIG_PAGING_LEVELS) ) + return INVALID_MFN; { mfn_t m4mfn; @@ -3067,9 +3068,14 @@ static int sh_page_fault(struct vcpu *v, * Preallocate shadow pages *before* removing writable accesses * otherwhise an OOS L1 might be demoted and promoted again with * writable mappings. */ - shadow_prealloc(d, - SH_type_l1_shadow, - GUEST_PAGING_LEVELS < 4 ? 1 : GUEST_PAGING_LEVELS - 1); + if ( !shadow_prealloc(d, SH_type_l1_shadow, + GUEST_PAGING_LEVELS < 4 + ? 1 : GUEST_PAGING_LEVELS - 1) ) + { + paging_unlock(d); + put_gfn(d, gfn_x(gfn)); + return 0; + } rc = gw_remove_write_accesses(v, va, &gw); @@ -3864,7 +3870,12 @@ sh_set_toplevel_shadow(struct vcpu *v, if ( !mfn_valid(smfn) ) { /* Make sure there's enough free shadow memory. */ - shadow_prealloc(d, root_type, 1); + if ( !shadow_prealloc(d, root_type, 1) ) + { + new_entry = pagetable_null(); + goto install_new_entry; + } + /* Shadow the page. */ smfn = sh_make_shadow(v, gmfn, root_type); } diff --git a/xen/arch/x86/mm/shadow/private.h b/xen/arch/x86/mm/shadow/private.h index 3fd3f0617a..e2100f0f34 100644 --- a/xen/arch/x86/mm/shadow/private.h +++ b/xen/arch/x86/mm/shadow/private.h @@ -351,7 +351,8 @@ void shadow_promote(struct domain *d, mfn_t gmfn, u32 type); void shadow_demote(struct domain *d, mfn_t gmfn, u32 type); /* Shadow page allocation functions */ -void shadow_prealloc(struct domain *d, u32 shadow_type, unsigned int count); +bool __must_check shadow_prealloc(struct domain *d, unsigned int shadow_type, + unsigned int count); mfn_t shadow_alloc(struct domain *d, u32 shadow_type, unsigned long backpointer); -- generated by git-patchbot for /home/xen/git/xen.git#staging-4.14

©2013 Xen Project, A Linux Foundation Collaborative Project. All Rights Reserved.
Linux Foundation is a registered trademark of The Linux Foundation.
Xen Project is a trademark of The Linux Foundation.