[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] [PATCH RFC 13/44] x86/pt-shadow: Shadow L4 tables from 64bit PV guests
See the code comments for reasoning and the algorithm description. This is a very simplistic algorithm, which comes with a substantial performance overhead. The algorithm will be improved in a later patch, once more infrastructure is in place. Some of the code (particularly in pt_maybe_shadow()) is structured oddly. This is deliberate to simplify the patch for the later algorithm improvement, to avoid unnecessary code motion getting in the way of the logical change. Signed-off-by: Andrew Cooper <andrew.cooper3@xxxxxxxxxx> --- v3: * Rebase over change to using ptsh * Rework, in terms of being as close to the eventual algorithm as possible, before we get map_domain_page() which is usable in context switch context. --- xen/arch/x86/mm.c | 5 +- xen/arch/x86/mm/shadow/multi.c | 2 + xen/arch/x86/pv/mm.h | 16 +++- xen/arch/x86/pv/pt-shadow.c | 164 +++++++++++++++++++++++++++++++++++++ xen/include/asm-x86/fixmap.h | 1 + xen/include/asm-x86/pv/pt-shadow.h | 24 ++++++ 6 files changed, 209 insertions(+), 3 deletions(-) diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c index f85ef6c..375565f 100644 --- a/xen/arch/x86/mm.c +++ b/xen/arch/x86/mm.c @@ -126,6 +126,7 @@ #include <asm/hvm/grant_table.h> #include <asm/pv/grant_table.h> #include <asm/pv/mm.h> +#include <asm/pv/pt-shadow.h> #include "pv/mm.h" @@ -501,13 +502,15 @@ DEFINE_PER_CPU(unsigned long, curr_ptbase); void do_write_ptbase(struct vcpu *v, bool tlb_maintenance) { - unsigned long new_cr3 = v->arch.cr3; + unsigned long new_cr3; unsigned int cpu = smp_processor_id(); unsigned long *this_curr_ptbase = &per_cpu(curr_ptbase, cpu); /* Check that %cr3 isn't being shuffled under our feet. */ ASSERT(*this_curr_ptbase == read_cr3()); + new_cr3 = pt_maybe_shadow(v); + if ( tlb_maintenance ) write_cr3(new_cr3); else diff --git a/xen/arch/x86/mm/shadow/multi.c b/xen/arch/x86/mm/shadow/multi.c index c4e954e..9c929ed 100644 --- a/xen/arch/x86/mm/shadow/multi.c +++ b/xen/arch/x86/mm/shadow/multi.c @@ -39,6 +39,7 @@ asm(".file \"" __OBJECT_FILE__ "\""); #include <asm/hvm/cacheattr.h> #include <asm/mtrr.h> #include <asm/guest_pt.h> +#include <asm/pv/pt-shadow.h> #include <public/sched.h> #include "private.h" #include "types.h" @@ -952,6 +953,7 @@ static int shadow_set_l4e(struct domain *d, /* Write the new entry */ shadow_write_entries(sl4e, &new_sl4e, 1, sl4mfn); + pt_shadow_l4_write(d, mfn_to_page(sl4mfn), pgentry_ptr_to_slot(sl4e)); flags |= SHADOW_SET_CHANGED; if ( shadow_l4e_get_flags(old_sl4e) & _PAGE_PRESENT ) diff --git a/xen/arch/x86/pv/mm.h b/xen/arch/x86/pv/mm.h index a10b09a..7c66ca7 100644 --- a/xen/arch/x86/pv/mm.h +++ b/xen/arch/x86/pv/mm.h @@ -1,6 +1,8 @@ #ifndef __PV_MM_H__ #define __PV_MM_H__ +#include <asm/pv/pt-shadow.h> + l1_pgentry_t *map_guest_l1e(unsigned long linear, mfn_t *gl1mfn); int new_guest_cr3(mfn_t mfn); @@ -38,7 +40,7 @@ static inline l1_pgentry_t guest_get_eff_l1e(unsigned long linear) */ static inline bool update_intpte(intpte_t *p, intpte_t old, intpte_t new, unsigned long mfn, struct vcpu *v, - bool preserve_ad) + bool preserve_ad, unsigned int level) { bool rv = true; @@ -77,6 +79,11 @@ static inline bool update_intpte(intpte_t *p, intpte_t old, intpte_t new, old = t; } } + + if ( level == 4 ) + pt_shadow_l4_write(v->domain, mfn_to_page(mfn), + pgentry_ptr_to_slot(p)); + return rv; } @@ -87,7 +94,12 @@ static inline bool update_intpte(intpte_t *p, intpte_t old, intpte_t new, #define UPDATE_ENTRY(_t,_p,_o,_n,_m,_v,_ad) \ update_intpte(&_t ## e_get_intpte(*(_p)), \ _t ## e_get_intpte(_o), _t ## e_get_intpte(_n), \ - (_m), (_v), (_ad)) + (_m), (_v), (_ad), _t ## _LEVEL) + +#define l1_LEVEL 1 +#define l2_LEVEL 2 +#define l3_LEVEL 3 +#define l4_LEVEL 4 static inline l1_pgentry_t adjust_guest_l1e(l1_pgentry_t l1e, const struct domain *d) diff --git a/xen/arch/x86/pv/pt-shadow.c b/xen/arch/x86/pv/pt-shadow.c index 7db8efb..46a0251 100644 --- a/xen/arch/x86/pv/pt-shadow.c +++ b/xen/arch/x86/pv/pt-shadow.c @@ -22,8 +22,32 @@ #include <xen/mm.h> #include <xen/numa.h> +#include <asm/fixmap.h> #include <asm/pv/pt-shadow.h> +/* + * To use percpu linear ranges, we require that no two pcpus have %cr3 + * pointing at the same L4 pagetable at the same time. + * + * Guests however might choose to use the same L4 pagetable on multiple vcpus + * at once, e.g. concurrently scheduling two threads from the same process. + * In practice, all HVM guests, and 32bit PV guests run on Xen-provided + * per-vcpu monitor tables, so it is only 64bit PV guests which are an issue. + * + * To resolve the issue, we shadow L4 pagetables from 64bit PV guests when + * they are in context. + * + * The algorithm is fairly simple. + * + * - When a pcpu is switching to a new vcpu cr3 and shadowing is necessary, + * perform a full 4K copy of the guests frame into a percpu frame, and run + * on that. + * - When a write to a guests L4 pagetable occurs, the update must be + * propagated to all existing shadows. An IPI is sent to the domains + * dirty mask indicating which frame/slot was updated, and each pcpu + * checks to see whether it needs to sync the update into its shadow. + */ + struct pt_shadow { /* * A frame used to shadow a vcpus intended pagetable. When shadowing, @@ -31,6 +55,17 @@ struct pt_shadow { */ paddr_t shadow_l4; l4_pgentry_t *shadow_l4_va; + + /* + * Domain to which the shadowed state belongs, or NULL if no state is + * being cached. IPIs for updates to cached information are based on the + * domain dirty mask, which can race with the target of the IPI switching + * to a different context. + */ + const struct domain *domain; + + /* If nonzero, a guests pagetable which we are shadowing. */ + paddr_t shadowing; }; static DEFINE_PER_CPU(struct pt_shadow, ptsh); @@ -76,6 +111,135 @@ void pt_shadow_free(unsigned int cpu) } /* + * We only need to shadow 4-level PV guests. All other guests have per-vcpu + * monitor tables which are never scheduled on concurrent pcpus. Care needs + * to be taken not to shadow d0v0 during construction, as it writes its L4 + * directly. + */ +static bool pt_need_shadow(const struct domain *d) +{ + return (system_state >= SYS_STATE_active && is_pv_domain(d) && + !is_idle_domain(d) && !is_pv_32bit_domain(d) && d->max_vcpus > 1); +} + +unsigned long pt_maybe_shadow(struct vcpu *v) +{ + unsigned int cpu = smp_processor_id(); + struct pt_shadow *ptsh = &per_cpu(ptsh, cpu); + unsigned long flags, new_cr3 = v->arch.cr3; + + /* + * IPIs for updates are based on the domain dirty mask. If we ever switch + * out of the currently shadowed context (even to idle), the cache will + * become stale. + */ + if ( ptsh->domain && + ptsh->domain != v->domain ) + { + ptsh->domain = NULL; + ptsh->shadowing = 0; + } + + /* No shadowing necessary? Run on the intended pagetable. */ + if ( !pt_need_shadow(v->domain) ) + return new_cr3; + + ptsh->domain = v->domain; + + /* Fastpath, if we are already shadowing the intended pagetable. */ + if ( ptsh->shadowing == new_cr3 ) + return ptsh->shadow_l4; + + /* + * We may be called with interrupts disabled (e.g. context switch), or + * interrupts enabled (e.g. new_guest_cr3()). + * + * Reads and modifications of ptsh-> are only on the local cpu, but must + * be excluded against reads and modifications in _pt_shadow_ipi(). + */ + local_irq_save(flags); + + { + l4_pgentry_t *l4t, *vcpu_l4t; + + set_percpu_fixmap(cpu, PERCPU_FIXSLOT_SHADOW, + l1e_from_paddr(new_cr3, __PAGE_HYPERVISOR_RO)); + ptsh->shadowing = new_cr3; + local_irq_restore(flags); + + l4t = ptsh->shadow_l4_va; + vcpu_l4t = percpu_fix_to_virt(cpu, PERCPU_FIXSLOT_SHADOW); + + copy_page(l4t, vcpu_l4t); + } + + return ptsh->shadow_l4; +} + +struct ptsh_ipi_info +{ + const struct domain *d; + const struct page_info *pg; + enum { + PTSH_IPI_WRITE, + } op; + unsigned int slot; +}; + +static void _pt_shadow_ipi(void *arg) +{ + unsigned int cpu = smp_processor_id(); + struct pt_shadow *ptsh = &per_cpu(ptsh, cpu); + const struct ptsh_ipi_info *info = arg; + unsigned long maddr = page_to_maddr(info->pg); + + /* No longer shadowing state from this domain? Nothing to do. */ + if ( info->d != ptsh->domain ) + return; + + /* Not shadowing this frame? Nothing to do. */ + if ( ptsh->shadowing != maddr ) + return; + + switch ( info->op ) + { + l4_pgentry_t *l4t, *vcpu_l4t; + + case PTSH_IPI_WRITE: + l4t = ptsh->shadow_l4_va; + + /* Reuse the mapping established in pt_maybe_shadow(). */ + ASSERT(l1e_get_paddr(*percpu_fixmap_l1e(cpu, PERCPU_FIXSLOT_SHADOW)) == + maddr); + vcpu_l4t = percpu_fix_to_virt(cpu, PERCPU_FIXSLOT_SHADOW); + + l4t[info->slot] = vcpu_l4t[info->slot]; + break; + + default: + ASSERT_UNREACHABLE(); + } +} + +void pt_shadow_l4_write(const struct domain *d, const struct page_info *pg, + unsigned int slot) +{ + struct ptsh_ipi_info info; + + if ( !pt_need_shadow(d) ) + return; + + info = (struct ptsh_ipi_info){ + .d = d, + .pg = pg, + .op = PTSH_IPI_WRITE, + .slot = slot, + }; + + on_selected_cpus(d->domain_dirty_cpumask, _pt_shadow_ipi, &info, 1); +} + +/* * Local variables: * mode: C * c-file-style: "BSD" diff --git a/xen/include/asm-x86/fixmap.h b/xen/include/asm-x86/fixmap.h index d46939a..748219f 100644 --- a/xen/include/asm-x86/fixmap.h +++ b/xen/include/asm-x86/fixmap.h @@ -28,6 +28,7 @@ #include <acpi/apei.h> #define NR_PERCPU_SLOTS 1 +#define PERCPU_FIXSLOT_SHADOW 0 /* * Here we define all the compile-time 'special' virtual diff --git a/xen/include/asm-x86/pv/pt-shadow.h b/xen/include/asm-x86/pv/pt-shadow.h index ff99c85..6e71e99 100644 --- a/xen/include/asm-x86/pv/pt-shadow.h +++ b/xen/include/asm-x86/pv/pt-shadow.h @@ -21,6 +21,8 @@ #ifndef __X86_PV_PT_SHADOW_H__ #define __X86_PV_PT_SHADOW_H__ +#include <xen/sched.h> + #ifdef CONFIG_PV /* @@ -30,11 +32,33 @@ int pt_shadow_alloc(unsigned int cpu); void pt_shadow_free(unsigned int cpu); +/* + * Called for context switches, and when a vcpu explicitly changes cr3. The + * PT shadow logic returns the cr3 hardware should run on, which is either + * v->arch.cr3 (no shadowing necessary), or a local frame (which is a suitable + * shadow of v->arch.cr3). + */ +unsigned long pt_maybe_shadow(struct vcpu *v); + +/* + * Called when a write occurs to an L4 pagetable. The PT shadow logic brings + * any shadows of this page up-to-date. + */ +void pt_shadow_l4_write( + const struct domain *d, const struct page_info *pg, unsigned int slot); + #else /* !CONFIG_PV */ static inline int pt_shadow_alloc(unsigned int cpu) { return 0; } static inline void pt_shadow_free(unsigned int cpu) { } +static inline unsigned long pt_maybe_shadow(struct vcpu *v) +{ + return v->arch.cr3; +} +static inline void pt_shadow_l4_write( + const struct domain *d, const struct page_info *pg, unsigned int slot) { } + #endif /* CONFIG_PV */ #endif /* __X86_PV_PT_SHADOW_H__ */ -- 2.1.4 _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxxxxxxxxx https://lists.xenproject.org/mailman/listinfo/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |