[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] [PATCH 6 of 9] Protect superpage splitting in implementation-dependent traversals
xen/arch/x86/mm/p2m-ept.c | 15 +++++++- xen/arch/x86/mm/p2m-lock.h | 11 +++++- xen/arch/x86/mm/p2m-pt.c | 82 ++++++++++++++++++++++++++++++++++----------- 3 files changed, 84 insertions(+), 24 deletions(-) In both pt and ept modes, the p2m trees can map 1GB superpages. Because our locks work on a 2MB superpage basis, without proper locking we could have two simultaneous traversals to two different 2MB ranges split the 1GB superpage in a racy manner. Fix this with the existing alloc_lock in the superpage structure. We allow 1GB-grained locking for a future implementation -- we just default to a global lock in all cases currently. Signed-off-by: Andres Lagar-Cavilla <andres@xxxxxxxxxxxxxxxx> diff -r a23e1262b124 -r 0a97d62c2d41 xen/arch/x86/mm/p2m-ept.c --- a/xen/arch/x86/mm/p2m-ept.c +++ b/xen/arch/x86/mm/p2m-ept.c @@ -163,6 +163,7 @@ static int ept_set_middle_entry(struct p } /* free ept sub tree behind an entry */ +/* Lock on this superpage (if any) held on entry */ static void ept_free_entry(struct p2m_domain *p2m, ept_entry_t *ept_entry, int level) { /* End if the entry is a leaf entry. */ @@ -181,6 +182,7 @@ static void ept_free_entry(struct p2m_do p2m_free_ptp(p2m, mfn_to_page(ept_entry->mfn)); } +/* Lock on this superpage held on entry */ static int ept_split_super_page(struct p2m_domain *p2m, ept_entry_t *ept_entry, int level, int target) { @@ -315,6 +317,7 @@ ept_set_entry(struct p2m_domain *p2m, un int needs_sync = 1; struct domain *d = p2m->domain; ept_entry_t old_entry = { .epte = 0 }; + p2m_lock_t *p2ml = p2m->lock; /* * the caller must make sure: @@ -361,6 +364,8 @@ ept_set_entry(struct p2m_domain *p2m, un * with a leaf entry (a 1GiB or 2MiB page), and handle things appropriately. */ + if ( target == 2 ) + lock_p2m_1G(p2ml, index); if ( i == target ) { /* We reached the target level. */ @@ -373,7 +378,7 @@ ept_set_entry(struct p2m_domain *p2m, un /* If we're replacing a non-leaf entry with a leaf entry (1GiB or 2MiB), * the intermediate tables will be freed below after the ept flush * - * Read-then-write is OK because we hold the p2m lock. */ + * Read-then-write is OK because we hold the 1G or 2M lock. */ old_entry = *ept_entry; if ( mfn_valid(mfn_x(mfn)) || direct_mmio || p2m_is_paged(p2mt) || @@ -412,6 +417,8 @@ ept_set_entry(struct p2m_domain *p2m, un if ( !ept_split_super_page(p2m, &split_ept_entry, i, target) ) { ept_free_entry(p2m, &split_ept_entry, i); + if ( target == 2 ) + unlock_p2m_1G(p2ml, index); goto out; } @@ -440,7 +447,7 @@ ept_set_entry(struct p2m_domain *p2m, un /* the caller should take care of the previous page */ new_entry.mfn = mfn_x(mfn); - /* Safe to read-then-write because we hold the p2m lock */ + /* Safe to read-then-write because we hold the 1G or 2M lock */ if ( ept_entry->mfn == new_entry.mfn ) need_modify_vtd_table = 0; @@ -448,6 +455,8 @@ ept_set_entry(struct p2m_domain *p2m, un atomic_write_ept_entry(ept_entry, new_entry); } + if ( target == 2 ) + unlock_p2m_1G(p2ml, index); /* Track the highest gfn for which we have ever had a valid mapping */ if ( mfn_valid(mfn_x(mfn)) && @@ -642,6 +651,8 @@ static ept_entry_t ept_get_entry_content return content; } +/* This is called before crashing a domain, so we're not particularly + * concerned with locking */ void ept_walk_table(struct domain *d, unsigned long gfn) { struct p2m_domain *p2m = p2m_get_hostp2m(d); diff -r a23e1262b124 -r 0a97d62c2d41 xen/arch/x86/mm/p2m-lock.h --- a/xen/arch/x86/mm/p2m-lock.h +++ b/xen/arch/x86/mm/p2m-lock.h @@ -109,6 +109,11 @@ typedef struct __p2m_lock { mm_lock_t lock; } p2m_lock_t; +/* We do not need sub-locking on 1G superpages because we have a + * global lock */ +#define lock_p2m_1G(l, gfn) ((void)l) +#define unlock_p2m_1G(l, gfn) ((void)l) + static inline int p2m_lock_init(struct p2m_domain *p2m) { p2m_lock_t *p2ml = xmalloc(p2m_lock_t); @@ -271,7 +276,8 @@ typedef struct __p2m_lock /* To enforce ordering in mm-locks */ int unlock_level; /* To protect on-demand allocation of locks - * (yeah you heard that right) */ + * (yeah you heard that right) + * Also protects 1GB superpage splitting. */ spinlock_t alloc_lock; /* Global lock */ p2m_inner_lock_t global; @@ -429,6 +435,9 @@ static inline void put_p2m_2m(struct p2m unlock_p2m_leaf(__get_2m_lock(p2m->lock, gfn_1g, gfn_2m)); } +#define lock_p2m_1G(l, gfn) spin_lock(&l->alloc_lock) +#define unlock_p2m_1G(l, gfn) spin_unlock(&l->alloc_lock) + /* Allocate 2M locks we may not have allocated yet for this 1G superpage */ static inline int alloc_locks_2m(struct p2m_domain *p2m, unsigned long gfn_1g) { diff -r a23e1262b124 -r 0a97d62c2d41 xen/arch/x86/mm/p2m-pt.c --- a/xen/arch/x86/mm/p2m-pt.c +++ b/xen/arch/x86/mm/p2m-pt.c @@ -159,6 +159,7 @@ p2m_next_level(struct p2m_domain *p2m, m unsigned long *gfn_remainder, unsigned long gfn, u32 shift, u32 max, unsigned long type) { + p2m_lock_t *p2ml = p2m->lock; l1_pgentry_t *l1_entry; l1_pgentry_t *p2m_entry; l1_pgentry_t new_entry; @@ -207,6 +208,7 @@ p2m_next_level(struct p2m_domain *p2m, m ASSERT(l1e_get_flags(*p2m_entry) & (_PAGE_PRESENT|_PAGE_PSE)); /* split 1GB pages into 2MB pages */ + lock_p2m_1G(p2ml, *gfn_remainder >> shift); if ( type == PGT_l2_page_table && (l1e_get_flags(*p2m_entry) & _PAGE_PSE) ) { unsigned long flags, pfn; @@ -214,7 +216,10 @@ p2m_next_level(struct p2m_domain *p2m, m pg = p2m_alloc_ptp(p2m, PGT_l2_page_table); if ( pg == NULL ) + { + unlock_p2m_1G(p2ml, *gfn_remainder >> shift); return 0; + } flags = l1e_get_flags(*p2m_entry); pfn = l1e_get_pfn(*p2m_entry); @@ -233,9 +238,11 @@ p2m_next_level(struct p2m_domain *p2m, m p2m_add_iommu_flags(&new_entry, 2, IOMMUF_readable|IOMMUF_writable); p2m->write_p2m_entry(p2m, gfn, p2m_entry, *table_mfn, new_entry, 3); } - + unlock_p2m_1G(p2ml, *gfn_remainder >> shift); /* split single 2MB large page into 4KB page in P2M table */ + /* This does not necessitate locking because 2MB regions are locked + * exclusively */ if ( type == PGT_l1_page_table && (l1e_get_flags(*p2m_entry) & _PAGE_PSE) ) { unsigned long flags, pfn; @@ -297,6 +304,7 @@ p2m_set_entry(struct p2m_domain *p2m, un IOMMUF_readable|IOMMUF_writable: 0; unsigned long old_mfn = 0; + p2m_lock_t *p2ml = p2m->lock; if ( tb_init_done ) { @@ -326,7 +334,10 @@ p2m_set_entry(struct p2m_domain *p2m, un */ if ( page_order == PAGE_ORDER_1G ) { - l1_pgentry_t old_entry = l1e_empty(); + l1_pgentry_t old_entry; + lock_p2m_1G(p2ml, l3_table_offset(gfn)); + + old_entry = l1e_empty(); p2m_entry = p2m_find_entry(table, &gfn_remainder, gfn, L3_PAGETABLE_SHIFT - PAGE_SHIFT, L3_PAGETABLE_ENTRIES); @@ -358,7 +369,9 @@ p2m_set_entry(struct p2m_domain *p2m, un /* Free old intermediate tables if necessary */ if ( l1e_get_flags(old_entry) & _PAGE_PRESENT ) p2m_free_entry(p2m, &old_entry, page_order); + unlock_p2m_1G(p2ml, l3_table_offset(gfn)); } + /* * When using PAE Xen, we only allow 33 bits of pseudo-physical * address in translated guests (i.e. 8 GBytes). This restriction @@ -515,6 +528,7 @@ static mfn_t p2m_gfn_to_mfn_current(stru * XXX Once we start explicitly registering MMIO regions in the p2m * XXX we will return p2m_invalid for unmapped gfns */ + p2m_lock_t *p2ml = p2m->lock; l1_pgentry_t l1e = l1e_empty(), *p2m_entry; l2_pgentry_t l2e = l2e_empty(); int ret; @@ -543,6 +557,8 @@ pod_retry_l3: /* The read has succeeded, so we know that mapping exists */ if ( q != p2m_query ) { + /* We do not need to lock the 1G superpage here because PoD + * will do it by splitting */ if ( !p2m_pod_demand_populate(p2m, gfn, PAGE_ORDER_1G, q) ) goto pod_retry_l3; p2mt = p2m_invalid; @@ -558,6 +574,7 @@ pod_retry_l3: goto pod_retry_l2; } + lock_p2m_1G(p2ml, l3_table_offset(addr)); if ( l3e_get_flags(l3e) & _PAGE_PSE ) { p2mt = p2m_flags_to_type(l3e_get_flags(l3e)); @@ -571,8 +588,12 @@ pod_retry_l3: if ( page_order ) *page_order = PAGE_ORDER_1G; + unlock_p2m_1G(p2ml, l3_table_offset(addr)); goto out; } + unlock_p2m_1G(p2ml, l3_table_offset(addr)); +#else + (void)p2ml; /* gcc ... */ #endif /* * Read & process L2 @@ -691,6 +712,7 @@ p2m_gfn_to_mfn(struct p2m_domain *p2m, u paddr_t addr = ((paddr_t)gfn) << PAGE_SHIFT; l2_pgentry_t *l2e; l1_pgentry_t *l1e; + p2m_lock_t *p2ml = p2m->lock; ASSERT(paging_mode_translate(p2m->domain)); @@ -744,6 +766,8 @@ pod_retry_l3: { if ( q != p2m_query ) { + /* See previous comments on why there is no need to lock + * 1GB superpage here */ if ( !p2m_pod_demand_populate(p2m, gfn, PAGE_ORDER_1G, q) ) goto pod_retry_l3; } @@ -755,16 +779,23 @@ pod_retry_l3: } else if ( (l3e_get_flags(*l3e) & _PAGE_PSE) ) { - mfn = _mfn(l3e_get_pfn(*l3e) + - l2_table_offset(addr) * L1_PAGETABLE_ENTRIES + - l1_table_offset(addr)); - *t = p2m_flags_to_type(l3e_get_flags(*l3e)); - unmap_domain_page(l3e); + lock_p2m_1G(p2ml, l3_table_offset(addr)); + /* Retry to be sure */ + if ( (l3e_get_flags(*l3e) & _PAGE_PSE) ) + { + mfn = _mfn(l3e_get_pfn(*l3e) + + l2_table_offset(addr) * L1_PAGETABLE_ENTRIES + + l1_table_offset(addr)); + *t = p2m_flags_to_type(l3e_get_flags(*l3e)); + unmap_domain_page(l3e); - ASSERT(mfn_valid(mfn) || !p2m_is_ram(*t)); - if ( page_order ) - *page_order = PAGE_ORDER_1G; - return (p2m_is_valid(*t)) ? mfn : _mfn(INVALID_MFN); + ASSERT(mfn_valid(mfn) || !p2m_is_ram(*t)); + if ( page_order ) + *page_order = PAGE_ORDER_1G; + unlock_p2m_1G(p2ml, l3_table_offset(addr)); + return (p2m_is_valid(*t)) ? mfn : _mfn(INVALID_MFN); + } + unlock_p2m_1G(p2ml, l3_table_offset(addr)); } mfn = _mfn(l3e_get_pfn(*l3e)); @@ -852,6 +883,7 @@ static void p2m_change_type_global(struc l4_pgentry_t *l4e; unsigned long i4; #endif /* CONFIG_PAGING_LEVELS == 4 */ + p2m_lock_t *p2ml = p2m->lock; BUG_ON(p2m_is_grant(ot) || p2m_is_grant(nt)); BUG_ON(ot != nt && (ot == p2m_mmio_direct || nt == p2m_mmio_direct)); @@ -891,17 +923,25 @@ static void p2m_change_type_global(struc } if ( (l3e_get_flags(l3e[i3]) & _PAGE_PSE) ) { - flags = l3e_get_flags(l3e[i3]); - if ( p2m_flags_to_type(flags) != ot ) + lock_p2m_1G(p2ml, i3); + if ( (l3e_get_flags(l3e[i3]) & _PAGE_PSE) ) + { + flags = l3e_get_flags(l3e[i3]); + if ( p2m_flags_to_type(flags) != ot ) + { + unlock_p2m_1G(p2ml, i3); + continue; + } + mfn = l3e_get_pfn(l3e[i3]); + gfn = get_gpfn_from_mfn(mfn); + flags = p2m_type_to_flags(nt, _mfn(mfn)); + l1e_content = l1e_from_pfn(mfn, flags | _PAGE_PSE); + p2m->write_p2m_entry(p2m, gfn, + (l1_pgentry_t *)&l3e[i3], + l3mfn, l1e_content, 3); + unlock_p2m_1G(p2ml, i3); continue; - mfn = l3e_get_pfn(l3e[i3]); - gfn = get_gpfn_from_mfn(mfn); - flags = p2m_type_to_flags(nt, _mfn(mfn)); - l1e_content = l1e_from_pfn(mfn, flags | _PAGE_PSE); - p2m->write_p2m_entry(p2m, gfn, - (l1_pgentry_t *)&l3e[i3], - l3mfn, l1e_content, 3); - continue; + } } l2mfn = _mfn(l3e_get_pfn(l3e[i3])); _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |