[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-changelog] [xen stable-4.8] x86/mm: Separate out partial_pte tristate into individual flags
commit bf78103ffe9e3f4ce781a218df041b4d929962ca Author: George Dunlap <george.dunlap@xxxxxxxxxx> AuthorDate: Mon Nov 4 15:24:34 2019 +0100 Commit: Jan Beulich <jbeulich@xxxxxxxx> CommitDate: Mon Nov 4 15:24:34 2019 +0100 x86/mm: Separate out partial_pte tristate into individual flags At the moment, partial_pte is a tri-state that contains two distinct bits of information: 1. If zero, the pte at index [nr_validated_ptes] is un-validated. If non-zero, the pte was last seen with PGT_partial set. 2. If positive, the pte at index [nr_validated_ptes] does not hold a general reference count. If negative, it does. To make future patches more clear, separate out this functionality into two distinct, named bits: PTF_partial_set (for #1) and PTF_partial_general_ref (for #2). Additionally, a number of functions which need this information also take other flags to control behavior (such as `preemptible` and `defer`). These are hard to read in the caller (since you only see 'true' or 'false'), and ugly when many are added together. In preparation for adding yet another flag in a future patch, collapse all of these into a single `flag` variable. NB that this does mean checking for what was previously the '-1' condition a bit more ugly in the put_page_from_lNe functions (since you have to check for both partial_set and general ref); but this clause will go away in a future patch. Also note that the original comment had an off-by-one error: partial_flags (like partial_pte before it) concerns plNe[nr_validated_ptes], not plNe[nr_validated_ptes+1]. No functional change intended. This is part of XSA-299. Reported-by: George Dunlap <george.dunlap@xxxxxxxxxx> Signed-off-by: George Dunlap <george.dunlap@xxxxxxxxxx> Reviewed-by: Jan Beulich <jbeulich@xxxxxxxx> master commit: 1b6fa638d21006d3c0a3038132c6cb326d8bba08 master date: 2019-10-31 16:12:14 +0100 --- xen/arch/x86/mm.c | 166 ++++++++++++++++++++++++++++------------------- xen/include/asm-x86/mm.h | 41 ++++++++---- 2 files changed, 128 insertions(+), 79 deletions(-) diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c index 1c19092503..680b5b3de7 100644 --- a/xen/arch/x86/mm.c +++ b/xen/arch/x86/mm.c @@ -756,22 +756,35 @@ static int get_page_from_pagenr(unsigned long page_nr, struct domain *d) static int __get_page_type(struct page_info *page, unsigned long type, int preemptible); +/* + * The following flags are used to specify behavior of various get and + * put commands. The first two are also stored in page->partial_flags + * to indicate the state of the page pointed to by + * page->pte[page->nr_validated_entries]. See the comment in mm.h for + * more information. + */ +#define PTF_partial_set (1 << 0) +#define PTF_partial_general_ref (1 << 1) +#define PTF_preemptible (1 << 2) +#define PTF_defer (1 << 3) + static int get_page_and_type_from_pagenr(unsigned long page_nr, unsigned long type, struct domain *d, - int partial, - int preemptible) + unsigned int flags) { struct page_info *page = mfn_to_page(page_nr); int rc; + bool preemptible = flags & PTF_preemptible, + partial_ref = flags & PTF_partial_general_ref; - if ( likely(partial >= 0) && + if ( likely(!partial_ref) && unlikely(!get_page_from_pagenr(page_nr, d)) ) return -EINVAL; rc = __get_page_type(page, type, preemptible); - if ( unlikely(rc) && partial >= 0 && + if ( unlikely(rc) && !partial_ref && (!preemptible || page != current->arch.old_guest_table) ) put_page(page); @@ -1230,7 +1243,7 @@ get_page_from_l1e( define_get_linear_pagetable(l2); static int get_page_from_l2e( - l2_pgentry_t l2e, unsigned long pfn, struct domain *d, int partial) + l2_pgentry_t l2e, unsigned long pfn, struct domain *d, unsigned int flags) { unsigned long mfn = l2e_get_pfn(l2e); int rc; @@ -1246,8 +1259,9 @@ get_page_from_l2e( if ( !(l2e_get_flags(l2e) & _PAGE_PSE) ) { - rc = get_page_and_type_from_pagenr(mfn, PGT_l1_page_table, d, - partial, false); + ASSERT(!(flags & PTF_preemptible)); + + rc = get_page_and_type_from_pagenr(mfn, PGT_l1_page_table, d, flags); if ( unlikely(rc == -EINVAL) && get_l2_linear_pagetable(l2e, pfn, d) ) rc = 0; return rc; @@ -1273,7 +1287,7 @@ get_page_from_l2e( define_get_linear_pagetable(l3); static int get_page_from_l3e( - l3_pgentry_t l3e, unsigned long pfn, struct domain *d, int partial) + l3_pgentry_t l3e, unsigned long pfn, struct domain *d, unsigned int flags) { int rc; @@ -1287,7 +1301,7 @@ get_page_from_l3e( } rc = get_page_and_type_from_pagenr( - l3e_get_pfn(l3e), PGT_l2_page_table, d, partial, 1); + l3e_get_pfn(l3e), PGT_l2_page_table, d, flags | PTF_preemptible); if ( unlikely(rc == -EINVAL) && !is_pv_32bit_domain(d) && get_l3_linear_pagetable(l3e, pfn, d) ) @@ -1299,7 +1313,7 @@ get_page_from_l3e( define_get_linear_pagetable(l4); static int get_page_from_l4e( - l4_pgentry_t l4e, unsigned long pfn, struct domain *d, int partial) + l4_pgentry_t l4e, unsigned long pfn, struct domain *d, unsigned int flags) { int rc; @@ -1313,7 +1327,7 @@ get_page_from_l4e( } rc = get_page_and_type_from_pagenr( - l4e_get_pfn(l4e), PGT_l3_page_table, d, partial, 1); + l4e_get_pfn(l4e), PGT_l3_page_table, d, flags | PTF_preemptible); if ( unlikely(rc == -EINVAL) && get_l4_linear_pagetable(l4e, pfn, d) ) rc = 0; @@ -1443,7 +1457,7 @@ void put_page_from_l1e(l1_pgentry_t l1e, struct domain *l1e_owner) * Note also that this automatically deals correctly with linear p.t.'s. */ static int put_page_from_l2e(l2_pgentry_t l2e, unsigned long pfn, - int partial, bool defer) + unsigned int flags) { int rc = 0; @@ -1457,12 +1471,13 @@ static int put_page_from_l2e(l2_pgentry_t l2e, unsigned long pfn, struct page_info *pg = l2e_get_page(l2e); struct page_info *ptpg = mfn_to_page(pfn); - if ( unlikely(partial > 0) ) + if ( (flags & (PTF_partial_set | PTF_partial_general_ref)) == + PTF_partial_set ) { - ASSERT(!defer); + ASSERT(!(flags & PTF_defer)); rc = _put_page_type(pg, true, ptpg); } - else if ( defer ) + else if ( flags & PTF_defer ) { current->arch.old_guest_ptpg = ptpg; current->arch.old_guest_table = pg; @@ -1479,7 +1494,7 @@ static int put_page_from_l2e(l2_pgentry_t l2e, unsigned long pfn, } static int put_page_from_l3e(l3_pgentry_t l3e, unsigned long pfn, - int partial, bool_t defer) + unsigned int flags) { struct page_info *pg; int rc; @@ -1502,13 +1517,14 @@ static int put_page_from_l3e(l3_pgentry_t l3e, unsigned long pfn, pg = l3e_get_page(l3e); - if ( unlikely(partial > 0) ) + if ( (flags & (PTF_partial_set | PTF_partial_general_ref)) == + PTF_partial_set ) { - ASSERT(!defer); + ASSERT(!(flags & PTF_defer)); return _put_page_type(pg, true, mfn_to_page(pfn)); } - if ( defer ) + if ( flags & PTF_defer ) { current->arch.old_guest_ptpg = mfn_to_page(pfn); current->arch.old_guest_table = pg; @@ -1523,7 +1539,7 @@ static int put_page_from_l3e(l3_pgentry_t l3e, unsigned long pfn, } static int put_page_from_l4e(l4_pgentry_t l4e, unsigned long pfn, - int partial, bool_t defer) + unsigned int flags) { int rc = 1; @@ -1532,13 +1548,14 @@ static int put_page_from_l4e(l4_pgentry_t l4e, unsigned long pfn, { struct page_info *pg = l4e_get_page(l4e); - if ( unlikely(partial > 0) ) + if ( (flags & (PTF_partial_set | PTF_partial_general_ref)) == + PTF_partial_set ) { - ASSERT(!defer); + ASSERT(!(flags & PTF_defer)); return _put_page_type(pg, true, mfn_to_page(pfn)); } - if ( defer ) + if ( flags & PTF_defer ) { current->arch.old_guest_ptpg = mfn_to_page(pfn); current->arch.old_guest_table = pg; @@ -1648,12 +1665,13 @@ static int alloc_l2_table(struct page_info *page, unsigned long type) unsigned long pfn = page_to_mfn(page); l2_pgentry_t *pl2e; unsigned int i; - int rc = 0, partial = page->partial_pte; + int rc = 0; + unsigned int partial_flags = page->partial_flags; pl2e = map_domain_page(_mfn(pfn)); for ( i = page->nr_validated_ptes; i < L2_PAGETABLE_ENTRIES; - i++, partial = 0 ) + i++, partial_flags = 0 ) { if ( i > page->nr_validated_ptes && hypercall_preempt_check() ) { @@ -1663,18 +1681,19 @@ static int alloc_l2_table(struct page_info *page, unsigned long type) } if ( !is_guest_l2_slot(d, type, i) || - (rc = get_page_from_l2e(pl2e[i], pfn, d, partial)) > 0 ) + (rc = get_page_from_l2e(pl2e[i], pfn, d, partial_flags)) > 0 ) continue; if ( rc == -ERESTART ) { page->nr_validated_ptes = i; - page->partial_pte = partial ?: 1; + /* Set 'set', retain 'general ref' */ + page->partial_flags = partial_flags | PTF_partial_set; } else if ( rc == -EINTR && i ) { page->nr_validated_ptes = i; - page->partial_pte = 0; + page->partial_flags = 0; rc = -ERESTART; } else if ( rc < 0 && rc != -EINTR ) @@ -1683,7 +1702,7 @@ static int alloc_l2_table(struct page_info *page, unsigned long type) if ( i ) { page->nr_validated_ptes = i; - page->partial_pte = 0; + page->partial_flags = 0; current->arch.old_guest_ptpg = NULL; current->arch.old_guest_table = page; } @@ -1713,7 +1732,8 @@ static int alloc_l3_table(struct page_info *page) unsigned long pfn = page_to_mfn(page); l3_pgentry_t *pl3e; unsigned int i; - int rc = 0, partial = page->partial_pte; + int rc = 0; + unsigned int partial_flags = page->partial_flags; pl3e = map_domain_page(_mfn(pfn)); @@ -1728,7 +1748,7 @@ static int alloc_l3_table(struct page_info *page) memset(pl3e + 4, 0, (L3_PAGETABLE_ENTRIES - 4) * sizeof(*pl3e)); for ( i = page->nr_validated_ptes; i < L3_PAGETABLE_ENTRIES; - i++, partial = 0 ) + i++, partial_flags = 0 ) { if ( i > page->nr_validated_ptes && hypercall_preempt_check() ) { @@ -1746,21 +1766,23 @@ static int alloc_l3_table(struct page_info *page) rc = get_page_and_type_from_pagenr(l3e_get_pfn(pl3e[i]), PGT_l2_page_table | PGT_pae_xen_l2, - d, partial, 1); + d, + partial_flags | PTF_preemptible); } else if ( !is_guest_l3_slot(i) || - (rc = get_page_from_l3e(pl3e[i], pfn, d, partial)) > 0 ) + (rc = get_page_from_l3e(pl3e[i], pfn, d, partial_flags)) > 0 ) continue; if ( rc == -ERESTART ) { page->nr_validated_ptes = i; - page->partial_pte = partial ?: 1; + /* Set 'set', leave 'general ref' set if this entry was set */ + page->partial_flags = partial_flags | PTF_partial_set; } else if ( rc == -EINTR && i ) { page->nr_validated_ptes = i; - page->partial_pte = 0; + page->partial_flags = 0; rc = -ERESTART; } if ( rc < 0 ) @@ -1777,7 +1799,7 @@ static int alloc_l3_table(struct page_info *page) if ( i ) { page->nr_validated_ptes = i; - page->partial_pte = 0; + page->partial_flags = 0; current->arch.old_guest_ptpg = NULL; current->arch.old_guest_table = page; } @@ -1843,19 +1865,21 @@ static int alloc_l4_table(struct page_info *page) unsigned long pfn = page_to_mfn(page); l4_pgentry_t *pl4e = map_domain_page(_mfn(pfn)); unsigned int i; - int rc = 0, partial = page->partial_pte; + int rc = 0; + unsigned int partial_flags = page->partial_flags; for ( i = page->nr_validated_ptes; i < L4_PAGETABLE_ENTRIES; - i++, partial = 0 ) + i++, partial_flags = 0 ) { if ( !is_guest_l4_slot(d, i) || - (rc = get_page_from_l4e(pl4e[i], pfn, d, partial)) > 0 ) + (rc = get_page_from_l4e(pl4e[i], pfn, d, partial_flags)) > 0 ) continue; if ( rc == -ERESTART ) { page->nr_validated_ptes = i; - page->partial_pte = partial ?: 1; + /* Set 'set', leave 'general ref' set if this entry was set */ + page->partial_flags = partial_flags | PTF_partial_set; } else if ( rc < 0 ) { @@ -1864,7 +1888,7 @@ static int alloc_l4_table(struct page_info *page) if ( i ) { page->nr_validated_ptes = i; - page->partial_pte = 0; + page->partial_flags = 0; if ( rc == -EINTR ) rc = -ERESTART; else @@ -1918,19 +1942,20 @@ static int free_l2_table(struct page_info *page) struct domain *d = page_get_owner(page); unsigned long pfn = page_to_mfn(page); l2_pgentry_t *pl2e; - int rc = 0, partial = page->partial_pte; - unsigned int i = page->nr_validated_ptes - !partial; + int rc = 0; + unsigned int partial_flags = page->partial_flags, + i = page->nr_validated_ptes - !(partial_flags & PTF_partial_set); pl2e = map_domain_page(_mfn(pfn)); for ( ; ; ) { if ( is_guest_l2_slot(d, page->u.inuse.type_info, i) ) - rc = put_page_from_l2e(pl2e[i], pfn, partial, false); + rc = put_page_from_l2e(pl2e[i], pfn, partial_flags); if ( rc < 0 ) break; - partial = 0; + partial_flags = 0; if ( !i-- ) break; @@ -1952,12 +1977,14 @@ static int free_l2_table(struct page_info *page) else if ( rc == -ERESTART ) { page->nr_validated_ptes = i; - page->partial_pte = partial ?: -1; + page->partial_flags = (partial_flags & PTF_partial_set) ? + partial_flags : + (PTF_partial_set | PTF_partial_general_ref); } else if ( rc == -EINTR && i < L2_PAGETABLE_ENTRIES - 1 ) { page->nr_validated_ptes = i + 1; - page->partial_pte = 0; + page->partial_flags = 0; rc = -ERESTART; } @@ -1969,8 +1996,9 @@ static int free_l3_table(struct page_info *page) struct domain *d = page_get_owner(page); unsigned long pfn = page_to_mfn(page); l3_pgentry_t *pl3e; - int rc = 0, partial = page->partial_pte; - unsigned int i = page->nr_validated_ptes - !partial; + int rc = 0; + unsigned int partial_flags = page->partial_flags, + i = page->nr_validated_ptes - !(partial_flags & PTF_partial_set); pl3e = map_domain_page(_mfn(pfn)); @@ -1978,11 +2006,11 @@ static int free_l3_table(struct page_info *page) { if ( is_guest_l3_slot(i) ) { - rc = put_page_from_l3e(pl3e[i], pfn, partial, 0); + rc = put_page_from_l3e(pl3e[i], pfn, partial_flags); if ( rc < 0 ) break; - partial = 0; + partial_flags = 0; if ( rc == 0 ) unadjust_guest_l3e(pl3e[i], d); } @@ -2002,12 +2030,14 @@ static int free_l3_table(struct page_info *page) if ( rc == -ERESTART ) { page->nr_validated_ptes = i; - page->partial_pte = partial ?: -1; + page->partial_flags = (partial_flags & PTF_partial_set) ? + partial_flags : + (PTF_partial_set | PTF_partial_general_ref); } else if ( rc == -EINTR && i < L3_PAGETABLE_ENTRIES - 1 ) { page->nr_validated_ptes = i + 1; - page->partial_pte = 0; + page->partial_flags = 0; rc = -ERESTART; } return rc > 0 ? 0 : rc; @@ -2018,26 +2048,29 @@ static int free_l4_table(struct page_info *page) struct domain *d = page_get_owner(page); unsigned long pfn = page_to_mfn(page); l4_pgentry_t *pl4e = map_domain_page(_mfn(pfn)); - int rc = 0, partial = page->partial_pte; - unsigned int i = page->nr_validated_ptes - !partial; + int rc = 0; + unsigned partial_flags = page->partial_flags, + i = page->nr_validated_ptes - !(partial_flags & PTF_partial_set); do { if ( is_guest_l4_slot(d, i) ) - rc = put_page_from_l4e(pl4e[i], pfn, partial, 0); + rc = put_page_from_l4e(pl4e[i], pfn, partial_flags); if ( rc < 0 ) break; - partial = 0; + partial_flags = 0; } while ( i-- ); if ( rc == -ERESTART ) { page->nr_validated_ptes = i; - page->partial_pte = partial ?: -1; + page->partial_flags = (partial_flags & PTF_partial_set) ? + partial_flags : + (PTF_partial_set | PTF_partial_general_ref); } else if ( rc == -EINTR && i < L4_PAGETABLE_ENTRIES - 1 ) { page->nr_validated_ptes = i + 1; - page->partial_pte = 0; + page->partial_flags = 0; rc = -ERESTART; } @@ -2315,7 +2348,7 @@ static int mod_l2_entry(l2_pgentry_t *pl2e, return -EBUSY; } - put_page_from_l2e(ol2e, pfn, 0, true); + put_page_from_l2e(ol2e, pfn, PTF_defer); return rc; } @@ -2389,7 +2422,7 @@ static int mod_l3_entry(l3_pgentry_t *pl3e, if ( !create_pae_xen_mappings(d, pl3e) ) BUG(); - put_page_from_l3e(ol3e, pfn, 0, 1); + put_page_from_l3e(ol3e, pfn, PTF_defer); return rc; } @@ -2451,7 +2484,7 @@ static int mod_l4_entry(l4_pgentry_t *pl4e, return -EFAULT; } - put_page_from_l4e(ol4e, pfn, 0, 1); + put_page_from_l4e(ol4e, pfn, PTF_defer); return rc; } @@ -2715,7 +2748,7 @@ int free_page_type(struct page_info *page, unsigned long type, if ( !(type & PGT_partial) ) { page->nr_validated_ptes = 1U << PAGETABLE_ORDER; - page->partial_pte = 0; + page->partial_flags = 0; } switch ( type & PGT_type_mask ) @@ -3003,7 +3036,7 @@ static int __get_page_type(struct page_info *page, unsigned long type, if ( !(x & PGT_partial) ) { page->nr_validated_ptes = 0; - page->partial_pte = 0; + page->partial_flags = 0; } page->linear_pt_count = 0; rc = alloc_page_type(page, type, preemptible); @@ -3369,7 +3402,8 @@ int new_guest_cr3(unsigned long mfn) rc = paging_mode_refcounts(d) ? (get_page_from_pagenr(mfn, d) ? 0 : -EINVAL) - : get_page_and_type_from_pagenr(mfn, PGT_root_page_table, d, 0, 1); + : get_page_and_type_from_pagenr(mfn, PGT_root_page_table, d, + PTF_preemptible); switch ( rc ) { case 0: @@ -3757,7 +3791,7 @@ long do_mmuext_op( rc = get_page_from_pagenr(op.arg1.mfn, d) ? 0 : -EINVAL; else rc = get_page_and_type_from_pagenr( - op.arg1.mfn, PGT_root_page_table, d, 0, 1); + op.arg1.mfn, PGT_root_page_table, d, PTF_preemptible); if ( unlikely(rc) ) { diff --git a/xen/include/asm-x86/mm.h b/xen/include/asm-x86/mm.h index 955e1fa86a..8e3cef954c 100644 --- a/xen/include/asm-x86/mm.h +++ b/xen/include/asm-x86/mm.h @@ -141,19 +141,34 @@ struct page_info * setting the flag must not drop that reference, whereas the instance * clearing it will have to. * - * If @partial_pte is positive then PTE at @nr_validated_ptes+1 has - * been partially validated. This implies that the general reference - * to the page (acquired from get_page_from_lNe()) would be dropped - * (again due to the apparent failure) and hence must be re-acquired - * when resuming the validation, but must not be dropped when picking - * up the page for invalidation. + * If partial_flags & PTF_partial_set is set, then the page at + * at @nr_validated_ptes had PGT_partial set as a result of an + * operation on the current page. (That page may or may not + * still have PGT_partial set.) * - * If @partial_pte is negative then PTE at @nr_validated_ptes+1 has - * been partially invalidated. This is basically the opposite case of - * above, i.e. the general reference to the page was not dropped in - * put_page_from_lNe() (due to the apparent failure), and hence it - * must be dropped when the put operation is resumed (and completes), - * but it must not be acquired if picking up the page for validation. + * If PTF_partial_general_ref is set, then the PTE at + * @nr_validated_ptef holds a general reference count for the + * page. + * + * This happens: + * - During de-validation, if de-validation of the page was + * interrupted + * - During validation, if an invalid entry is encountered and + * validation is preemptible + * - During validation, if PTF_partial_general_ref was set on + * this entry to begin with (perhaps because we're picking + * up from a partial de-validation). + * + * When resuming validation, if PTF_partial_general_ref is clear, + * then a general reference must be re-acquired; if it is set, no + * reference should be acquired. + * + * When resuming de-validation, if PTF_partial_general_ref is + * clear, no reference should be dropped; if it is set, a + * reference should be dropped. + * + * NB that PTF_partial_set and PTF_partial_general_ref are + * defined in mm.c, the only place where they are used. * * The 3rd field, @linear_pt_count, indicates * - by a positive value, how many same-level page table entries a page @@ -164,7 +179,7 @@ struct page_info struct { u16 nr_validated_ptes:PAGETABLE_ORDER + 1; u16 :16 - PAGETABLE_ORDER - 1 - 2; - s16 partial_pte:2; + u16 partial_flags:2; s16 linear_pt_count; }; -- generated by git-patchbot for /home/xen/git/xen.git#stable-4.8 _______________________________________________ Xen-changelog mailing list Xen-changelog@xxxxxxxxxxxxxxxxxxxx https://lists.xenproject.org/xen-changelog
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |