Xen project Mailing List

Re: [Xen-devel] open-coded page list manipulation in shadow code

From: Tim Deegan <tim@xxxxxxx>

Date: Thu, 29 Jan 2015 18:30:46 +0100

Cc: xen-devel <xen-devel@xxxxxxxxxxxxxxxxxxxx>

Delivery-date: Thu, 29 Jan 2015 17:31:18 +0000

List-id: Xen developer discussion <xen-devel.lists.xen.org>

Hi, At 11:50 +0100 on 27 Jan (1422355826), Tim Deegan wrote: > Let me have a think about it on Thursday (assuming I don't have too > many patch series to review by then!) OK, here's what I have. It keeps the mechanism the same, threading on the main page list entry, but tidies it up to use the page_list operations rather than open-coding. I've tested it (lightly - basically jsut boot testing) with 32bit, 32pae and 64bit Windows VMs (all SMP), with bigmem=y and bigmem=n. It was developed on top of your bigmem series, but it should apply OK before patch 3/4, removing the need to nobble shadow-paging in that patch. Cheers, Tim. From 57d211e47ef42012791252b4897231b392b7aff7 Mon Sep 17 00:00:00 2001 From: Tim Deegan <tim@xxxxxxx> Date: Thu, 29 Jan 2015 17:09:58 +0000 Subject: [PATCH] x86/shadow: Tidy up fragmentary page lists in multi-page shadows. Multi-page shadows are linked together using the 'list' field. When those shadows are in the pinned list, the list fragments are spliced into the pinned list; otherwise they have no associated list head. Rework the code that handles these fragments to use the page_list interface rather than manipulating the fields directly. This makes the code cleaner, and allows the 'list' field to be either the compact pdx form or a normal list_entry. Signed-off-by: Tim Deegan <tim@xxxxxxx> --- xen/arch/x86/mm/shadow/common.c | 42 +++-------- xen/arch/x86/mm/shadow/multi.c | 14 ++-- xen/arch/x86/mm/shadow/private.h | 151 ++++++++++++++++++++++----------------- xen/include/xen/mm.h | 9 +++ 4 files changed, 114 insertions(+), 102 deletions(-) diff --git a/xen/arch/x86/mm/shadow/common.c b/xen/arch/x86/mm/shadow/common.c index 61110bb..12a86c2 100644 --- a/xen/arch/x86/mm/shadow/common.c +++ b/xen/arch/x86/mm/shadow/common.c @@ -1220,33 +1220,6 @@ static unsigned int shadow_min_acceptable_pages(struct domain *d) return (vcpu_count * 128); } -/* Figure out the size (in pages) of a given shadow type */ -static inline u32 -shadow_size(unsigned int shadow_type) -{ - static const u32 type_to_size[SH_type_unused] = { - 1, /* SH_type_none */ - 2, /* SH_type_l1_32_shadow */ - 2, /* SH_type_fl1_32_shadow */ - 4, /* SH_type_l2_32_shadow */ - 1, /* SH_type_l1_pae_shadow */ - 1, /* SH_type_fl1_pae_shadow */ - 1, /* SH_type_l2_pae_shadow */ - 1, /* SH_type_l2h_pae_shadow */ - 1, /* SH_type_l1_64_shadow */ - 1, /* SH_type_fl1_64_shadow */ - 1, /* SH_type_l2_64_shadow */ - 1, /* SH_type_l2h_64_shadow */ - 1, /* SH_type_l3_64_shadow */ - 1, /* SH_type_l4_64_shadow */ - 1, /* SH_type_p2m_table */ - 1, /* SH_type_monitor_table */ - 1 /* SH_type_oos_snapshot */ - }; - ASSERT(shadow_type < SH_type_unused); - return type_to_size[shadow_type]; -} - /* Dispatcher function: call the per-mode function that will unhook the * non-Xen mappings in this top-level shadow mfn. With user_only == 1, * unhooks only the user-mode mappings. */ @@ -1489,9 +1462,6 @@ mfn_t shadow_alloc(struct domain *d, break; } - /* Page lists don't have pointers back to the head structure, so - * it's safe to use a head structure on the stack to link the pages - * together. */ INIT_PAGE_LIST_HEAD(&tmp_list); /* Init page info fields and clear the pages */ @@ -1525,6 +1495,14 @@ mfn_t shadow_alloc(struct domain *d, if ( shadow_type >= SH_type_min_shadow && shadow_type <= SH_type_max_shadow ) sp->u.sh.head = 1; + +#ifndef PAGE_LIST_NULL + /* The temporary list-head is on our stack. Blank out the + * pointers to it in the shadows, just to get a clean failure if + * we accidentally follow them. */ + tmp_list.next->prev = tmp_list.prev->next = NULL; +#endif + return page_to_mfn(sp); } @@ -1533,6 +1511,7 @@ mfn_t shadow_alloc(struct domain *d, void shadow_free(struct domain *d, mfn_t smfn) { struct page_info *next = NULL, *sp = mfn_to_page(smfn); + struct page_list_head *pin_list; unsigned int pages; u32 shadow_type; int i; @@ -1544,6 +1523,7 @@ void shadow_free(struct domain *d, mfn_t smfn) ASSERT(shadow_type != SH_type_none); ASSERT(sp->u.sh.head || (shadow_type > SH_type_max_shadow)); pages = shadow_size(shadow_type); + pin_list = &d->arch.paging.shadow.pinned_shadows; for ( i = 0; i < pages; i++ ) { @@ -1564,7 +1544,7 @@ void shadow_free(struct domain *d, mfn_t smfn) #endif /* Get the next page before we overwrite the list header */ if ( i < pages - 1 ) - next = pdx_to_page(sp->list.next); + next = page_list_next(sp, pin_list); /* Strip out the type: this is now a free shadow page */ sp->u.sh.type = sp->u.sh.head = 0; /* Remember the TLB timestamp so we will know whether to flush diff --git a/xen/arch/x86/mm/shadow/multi.c b/xen/arch/x86/mm/shadow/multi.c index 65815bb..5fc10c9 100644 --- a/xen/arch/x86/mm/shadow/multi.c +++ b/xen/arch/x86/mm/shadow/multi.c @@ -428,20 +428,20 @@ sh_guest_get_eff_l1e(struct vcpu *v, unsigned long addr, void *eff_l1e) /* From one page of a multi-page shadow, find the next one */ static inline mfn_t sh_next_page(mfn_t smfn) { - mfn_t next; - struct page_info *pg = mfn_to_page(smfn); + struct page_info *pg = mfn_to_page(smfn), *next; + struct page_list_head h = PAGE_LIST_HEAD_INIT(h); ASSERT(pg->u.sh.type == SH_type_l1_32_shadow || pg->u.sh.type == SH_type_fl1_32_shadow || pg->u.sh.type == SH_type_l2_32_shadow); ASSERT(pg->u.sh.type == SH_type_l2_32_shadow || pg->u.sh.head); - ASSERT(pg->list.next != PAGE_LIST_NULL); - next = _mfn(pdx_to_pfn(pg->list.next)); + next = page_list_next(pg, &h); - ASSERT(mfn_to_page(next)->u.sh.type == pg->u.sh.type); - ASSERT(!mfn_to_page(next)->u.sh.head); - return next; + ASSERT(next); + ASSERT(next->u.sh.type == pg->u.sh.type); + ASSERT(!next->u.sh.head); + return page_to_mfn(next); } static inline u32 diff --git a/xen/arch/x86/mm/shadow/private.h b/xen/arch/x86/mm/shadow/private.h index b778fcf..b145cee 100644 --- a/xen/arch/x86/mm/shadow/private.h +++ b/xen/arch/x86/mm/shadow/private.h @@ -318,6 +318,33 @@ static inline int mfn_oos_may_write(mfn_t gmfn) } #endif /* (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC) */ +/* Figure out the size (in pages) of a given shadow type */ +static inline u32 +shadow_size(unsigned int shadow_type) +{ + static const u32 type_to_size[SH_type_unused] = { + 1, /* SH_type_none */ + 2, /* SH_type_l1_32_shadow */ + 2, /* SH_type_fl1_32_shadow */ + 4, /* SH_type_l2_32_shadow */ + 1, /* SH_type_l1_pae_shadow */ + 1, /* SH_type_fl1_pae_shadow */ + 1, /* SH_type_l2_pae_shadow */ + 1, /* SH_type_l2h_pae_shadow */ + 1, /* SH_type_l1_64_shadow */ + 1, /* SH_type_fl1_64_shadow */ + 1, /* SH_type_l2_64_shadow */ + 1, /* SH_type_l2h_64_shadow */ + 1, /* SH_type_l3_64_shadow */ + 1, /* SH_type_l4_64_shadow */ + 1, /* SH_type_p2m_table */ + 1, /* SH_type_monitor_table */ + 1 /* SH_type_oos_snapshot */ + }; + ASSERT(shadow_type < SH_type_unused); + return type_to_size[shadow_type]; +} + /****************************************************************************** * Various function declarations */ @@ -586,22 +613,25 @@ prev_pinned_shadow(const struct page_info *page, const struct domain *d) { struct page_info *p; + const struct page_list_head *pin_list; + + pin_list = &d->arch.paging.shadow.pinned_shadows; - if ( page == d->arch.paging.shadow.pinned_shadows.next ) + if ( page_list_empty(pin_list) || page == page_list_first(pin_list) ) return NULL; - + if ( page == NULL ) /* If no current place, start at the tail */ - p = d->arch.paging.shadow.pinned_shadows.tail; + p = page_list_last(pin_list); else - p = pdx_to_page(page->list.prev); + p = page_list_prev(page, pin_list); /* Skip over the non-tail parts of multi-page shadows */ if ( p && p->u.sh.type == SH_type_l2_32_shadow ) { - p = pdx_to_page(p->list.prev); + p = page_list_prev(p, pin_list); ASSERT(p && p->u.sh.type == SH_type_l2_32_shadow); - p = pdx_to_page(p->list.prev); + p = page_list_prev(p, pin_list); ASSERT(p && p->u.sh.type == SH_type_l2_32_shadow); - p = pdx_to_page(p->list.prev); + p = page_list_prev(p, pin_list); ASSERT(p && p->u.sh.type == SH_type_l2_32_shadow); } ASSERT(!p || p->u.sh.head); @@ -618,49 +648,48 @@ prev_pinned_shadow(const struct page_info *page, * Returns 0 for failure, 1 for success. */ static inline int sh_pin(struct vcpu *v, mfn_t smfn) { - struct page_info *sp; - struct page_list_head h, *pin_list; - + struct page_info *sp[4]; + struct page_list_head *pin_list; + unsigned int i, pages; + bool_t already_pinned; + ASSERT(mfn_valid(smfn)); - sp = mfn_to_page(smfn); - ASSERT(sh_type_is_pinnable(v, sp->u.sh.type)); - ASSERT(sp->u.sh.head); + sp[0] = mfn_to_page(smfn); + pages = shadow_size(sp[0]->u.sh.type); + already_pinned = sp[0]->u.sh.pinned; + ASSERT(sh_type_is_pinnable(v, sp[0]->u.sh.type)); + ASSERT(sp[0]->u.sh.head); + + pin_list = &v->domain->arch.paging.shadow.pinned_shadows; + if ( already_pinned && sp[0] == page_list_first(pin_list) ) + return 1; /* Treat the up-to-four pages of the shadow as a unit in the list ops */ - h.next = h.tail = sp; - if ( sp->u.sh.type == SH_type_l2_32_shadow ) + for ( i = 1; i < pages; i++ ) { - h.tail = pdx_to_page(h.tail->list.next); - h.tail = pdx_to_page(h.tail->list.next); - h.tail = pdx_to_page(h.tail->list.next); - ASSERT(h.tail->u.sh.type == SH_type_l2_32_shadow); + sp[i] = page_list_next(sp[i - 1], pin_list); + ASSERT(sp[i]->u.sh.type == sp[0]->u.sh.type); + ASSERT(!sp[i]->u.sh.head); } - pin_list = &v->domain->arch.paging.shadow.pinned_shadows; - if ( sp->u.sh.pinned ) + if ( already_pinned ) { - /* Already pinned: take it out of the pinned-list so it can go - * at the front */ - if ( pin_list->next == h.next ) - return 1; - page_list_prev(h.next, pin_list)->list.next = h.tail->list.next; - if ( pin_list->tail == h.tail ) - pin_list->tail = page_list_prev(h.next, pin_list); - else - page_list_next(h.tail, pin_list)->list.prev = h.next->list.prev; - h.tail->list.next = h.next->list.prev = PAGE_LIST_NULL; + /* Take it out of the pinned-list so it can go at the front */ + for ( i = 0; i < pages; i++ ) + page_list_del(sp[i], pin_list); } else { /* Not pinned: pin it! */ if ( !sh_get_ref(v, smfn, 0) ) return 0; - sp->u.sh.pinned = 1; - ASSERT(h.next->list.prev == PAGE_LIST_NULL); - ASSERT(h.tail->list.next == PAGE_LIST_NULL); + sp[0]->u.sh.pinned = 1; } + /* Put it at the head of the list of pinned shadows */ - page_list_splice(&h, pin_list); + for ( i = pages; i > 0; i-- ) + page_list_add(sp[i - 1], pin_list); + return 1; } @@ -668,46 +697,40 @@ static inline int sh_pin(struct vcpu *v, mfn_t smfn) * of pinned shadows, and release the extra ref. */ static inline void sh_unpin(struct vcpu *v, mfn_t smfn) { - struct page_list_head h, *pin_list; - struct page_info *sp; - + struct page_list_head tmp_list, *pin_list; + struct page_info *sp, *next; + unsigned int i, head_type; + ASSERT(mfn_valid(smfn)); sp = mfn_to_page(smfn); + head_type = sp->u.sh.type; ASSERT(sh_type_is_pinnable(v, sp->u.sh.type)); ASSERT(sp->u.sh.head); - /* Treat the up-to-four pages of the shadow as a unit in the list ops */ - h.next = h.tail = sp; - if ( sp->u.sh.type == SH_type_l2_32_shadow ) - { - h.tail = pdx_to_page(h.tail->list.next); - h.tail = pdx_to_page(h.tail->list.next); - h.tail = pdx_to_page(h.tail->list.next); - ASSERT(h.tail->u.sh.type == SH_type_l2_32_shadow); - } - pin_list = &v->domain->arch.paging.shadow.pinned_shadows; - if ( !sp->u.sh.pinned ) return; - sp->u.sh.pinned = 0; - /* Cut the sub-list out of the list of pinned shadows */ - if ( pin_list->next == h.next && pin_list->tail == h.tail ) - pin_list->next = pin_list->tail = NULL; - else + /* Cut the sub-list out of the list of pinned shadows, + * stitching it back into a list fragment of its own. */ + pin_list = &v->domain->arch.paging.shadow.pinned_shadows; + INIT_PAGE_LIST_HEAD(&tmp_list); + for ( i = 0; i < shadow_size(head_type); i++ ) { - if ( pin_list->next == h.next ) - pin_list->next = page_list_next(h.tail, pin_list); - else - page_list_prev(h.next, pin_list)->list.next = h.tail->list.next; - if ( pin_list->tail == h.tail ) - pin_list->tail = page_list_prev(h.next, pin_list); - else - page_list_next(h.tail, pin_list)->list.prev = h.next->list.prev; + ASSERT(sp->u.sh.type == head_type); + ASSERT(!i || !sp->u.sh.head); + next = page_list_next(sp, pin_list); + page_list_del(sp, pin_list); + page_list_add_tail(sp, &tmp_list); + sp = next; } - h.tail->list.next = h.next->list.prev = PAGE_LIST_NULL; - +#ifndef PAGE_LIST_NULL + /* The temporary list-head is on our stack. Blank out the + * pointers to it in the shadows, just to get a clean failure if + * we accidentally follow them. */ + tmp_list.next->prev = tmp_list.prev->next = NULL; +#endif + sh_put_ref(v, smfn, 0); } diff --git a/xen/include/xen/mm.h b/xen/include/xen/mm.h index 74a65a6..a62ee1e 100644 --- a/xen/include/xen/mm.h +++ b/xen/include/xen/mm.h @@ -172,6 +172,11 @@ page_list_first(const struct page_list_head *head) return head->next; } static inline struct page_info * +page_list_last(const struct page_list_head *head) +{ + return head->tail; +} +static inline struct page_info * page_list_next(const struct page_info *page, const struct page_list_head *head) { @@ -331,8 +336,12 @@ page_list_splice(struct page_list_head *list, struct page_list_head *head) # define page_list_empty list_empty # define page_list_first(hd) list_entry((hd)->next, \ struct page_info, list) +# define page_list_last(hd) list_entry((hd)->prev, \ + struct page_info, list) # define page_list_next(pg, hd) list_entry((pg)->list.next, \ struct page_info, list) +# define page_list_prev(pg, hd) list_entry((pg)->list.prev, \ + struct page_info, list) # define page_list_add(pg, hd) list_add(&(pg)->list, hd) # define page_list_add_tail(pg, hd) list_add_tail(&(pg)->list, hd) # define page_list_del(pg, hd) list_del(&(pg)->list) -- 2.1.4 _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxx http://lists.xen.org/xen-devel

©2013 Xen Project, A Linux Foundation Collaborative Project. All Rights Reserved.
Linux Foundation is a registered trademark of The Linux Foundation.
Xen Project is a trademark of The Linux Foundation.