Xen project Mailing List

Re: [Xen-devel] RFC: AMD support for paging

> -----Original Message----- > From: xen-devel-bounces@xxxxxxxxxxxxxxxxxxx > [mailto:xen-devel-bounces@xxxxxxxxxxxxxxxxxxx] On Behalf Of Andres > Lagar-Cavilla > Sent: Wednesday, February 15, 2012 3:05 AM > To: xen-devel@xxxxxxxxxxxxxxxxxxx > Cc: olaf@xxxxxxxxx; keir.xen@xxxxxxxxx; tim@xxxxxxx; JBeulich@xxxxxxxx; > adin@xxxxxxxxxxxxxx > Subject: [Xen-devel] RFC: AMD support for paging > > We started hashing out some AMD support for mem_paging and mem_access. > Right now my VMs boot, page out a bit, and then die on an HVM triple > fault. > > Most importantly, I want to get somebody from AMD to comment/help out on > this. It feels like we're inches away from enabling support for this very > nice feature. I'm not sure who exactly on AMD monitors the list for these > kinds of things. It'd be great to have you on board! > > For starters, the changes to the p2m code are relatively mild, but it'd be > great if somebody spots a red flag. > > Another issue: comments indicate that bits 59-62 in NPT entries are used > for IOMMU flags but effectively bits 61-62 are. Repossessing one bit (59) > would give us enough space to support mem_access. Right now we only have 7 > bits available for Xen flags and that is not enough for paging and access. > Is bit 59 effectively reserved? > > Finally, the triple fault. Maybe I'm missing something obvious. Comments > welcome. > > Patch inline below, thanks! > Andres > > Enable AMD support for paging. > > Signed-off-by: Andres Lagar-Cavilla <andres@xxxxxxxxxxxxxxxx> > Signed-off-by: Adin Scannell <adin@xxxxxxxxxxx> > > diff -r 25ca78889ed4 -r 10ca4e4293ce xen/arch/x86/mm/mem_event.c > --- a/xen/arch/x86/mm/mem_event.c > +++ b/xen/arch/x86/mm/mem_event.c > @@ -537,10 +537,6 @@ int mem_event_domctl(struct domain *d, x > if ( !hap_enabled(d) ) > break; > > - /* Currently only EPT is supported */ > - if ( boot_cpu_data.x86_vendor != X86_VENDOR_INTEL ) > - break; > - > rc = -EXDEV; > /* Disallow paging in a PoD guest */ > if ( p2m->pod.entry_count ) > diff -r 25ca78889ed4 -r 10ca4e4293ce xen/arch/x86/mm/p2m-pt.c > --- a/xen/arch/x86/mm/p2m-pt.c > +++ b/xen/arch/x86/mm/p2m-pt.c > @@ -53,6 +53,20 @@ > #define P2M_BASE_FLAGS \ > (_PAGE_PRESENT | _PAGE_USER | _PAGE_DIRTY | _PAGE_ACCESSED) > > +#ifdef __x86_64__ > +/* l1e_from_pfn is not designed to have INVALID_MFN stored. The 0xff..ff > + * value tramples over the higher-order bits used for flags (NX, p2mt, > + * etc.) This happens for paging entries. Thus we do this clip/unclip > + * juggle for l1 entries only (no paging superpages!) */ > +#define EFF_MFN_WIDTH (PADDR_BITS-PAGE_SHIFT) /* 40 bits */ > +#define clipped_mfn(mfn) ((mfn) & ((1UL << EFF_MFN_WIDTH) - 1)) > +#define unclip_mfn(mfn) ((clipped_mfn((mfn)) == INVALID_MFN) ? \ > + INVALID_MFN : (mfn)) > +#else > +#define clipped_mfn(mfn) (mfn) > +#define unclip_mfn(mfn) (mfn) > +#endif /* __x86_64__ */ > + > static unsigned long p2m_type_to_flags(p2m_type_t t, mfn_t mfn) > { > unsigned long flags; > @@ -77,6 +91,9 @@ static unsigned long p2m_type_to_flags(p > case p2m_invalid: > case p2m_mmio_dm: > case p2m_populate_on_demand: > + case p2m_ram_paging_out: > + case p2m_ram_paged: > + case p2m_ram_paging_in: > default: > return flags; > case p2m_ram_ro: > @@ -168,7 +185,7 @@ p2m_next_level(struct p2m_domain *p2m, m > shift, max)) ) > return 0; > > - /* PoD: Not present doesn't imply empty. */ > + /* PoD/paging: Not present doesn't imply empty. */ > if ( !l1e_get_flags(*p2m_entry) ) > { > struct page_info *pg; > @@ -384,8 +401,9 @@ p2m_set_entry(struct p2m_domain *p2m, un > 0, L1_PAGETABLE_ENTRIES); > ASSERT(p2m_entry); > > - if ( mfn_valid(mfn) || (p2mt == p2m_mmio_direct) ) > - entry_content = l1e_from_pfn(mfn_x(mfn), > + if ( mfn_valid(mfn) || (p2mt == p2m_mmio_direct) || > + (p2mt == p2m_ram_paged) || (p2mt == p2m_ram_paging_in) ) > + entry_content = l1e_from_pfn(clipped_mfn(mfn_x(mfn)), > p2m_type_to_flags(p2mt, mfn)); > else > entry_content = l1e_empty(); > @@ -393,7 +411,7 @@ p2m_set_entry(struct p2m_domain *p2m, un > if ( entry_content.l1 != 0 ) > { > p2m_add_iommu_flags(&entry_content, 0, iommu_pte_flags); > - old_mfn = l1e_get_pfn(*p2m_entry); > + old_mfn = unclip_mfn(l1e_get_pfn(*p2m_entry)); > } > /* level 1 entry */ > p2m->write_p2m_entry(p2m, gfn, p2m_entry, table_mfn, > entry_content, 1); > @@ -615,11 +633,12 @@ pod_retry_l1: > sizeof(l1e)); > > if ( ret == 0 ) { > + unsigned long l1e_mfn = unclip_mfn(l1e_get_pfn(l1e)); > p2mt = p2m_flags_to_type(l1e_get_flags(l1e)); > - ASSERT(l1e_get_pfn(l1e) != INVALID_MFN || !p2m_is_ram(p2mt)); > + ASSERT( (l1e_mfn != INVALID_MFN || !p2m_is_ram(p2mt)) || > + (l1e_mfn == INVALID_MFN && p2m_is_paging(p2mt)) ); > > - if ( p2m_flags_to_type(l1e_get_flags(l1e)) > - == p2m_populate_on_demand ) > + if ( p2mt == p2m_populate_on_demand ) > { > /* The read has succeeded, so we know that the mapping > * exits at this point. */ > @@ -641,7 +660,7 @@ pod_retry_l1: > } > > if ( p2m_is_valid(p2mt) || p2m_is_grant(p2mt) ) > - mfn = _mfn(l1e_get_pfn(l1e)); > + mfn = _mfn(l1e_mfn); > else > /* XXX see above */ > p2mt = p2m_mmio_dm; > @@ -783,18 +802,26 @@ pod_retry_l2: > pod_retry_l1: > if ( (l1e_get_flags(*l1e) & _PAGE_PRESENT) == 0 ) > { > + p2m_type_t l1t = p2m_flags_to_type(l1e_get_flags(*l1e)); > /* PoD: Try to populate */ > - if ( p2m_flags_to_type(l1e_get_flags(*l1e)) == > p2m_populate_on_demand ) > + if ( l1t == p2m_populate_on_demand ) > { > if ( q != p2m_query ) { > if ( !p2m_pod_demand_populate(p2m, gfn, PAGE_ORDER_4K, q) ) > goto pod_retry_l1; > } else > *t = p2m_populate_on_demand; > + } else { > + if ( p2m_is_paging(l1t) ) > + { > + *t = l1t; > + /* No need to unclip due to check below */ > + mfn = _mfn(l1e_get_pfn(*l1e)); > + } > } > > unmap_domain_page(l1e); > - return _mfn(INVALID_MFN); > + return (l1t == p2m_ram_paging_out) ? mfn : _mfn(INVALID_MFN); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ This means when p2mt is p2m_ram_paging_in, the mfn must be INVALID_MFN; But in p2m_type_to_flags,when p2m_ram_paging_in or p2m_ram_paged, the _PAGE_PRESENT is not set. My idea shows like this: In p2m_gfn_to_mfn function: @@ -1370,6 +1379,7 @@ paddr_t addr = ((paddr_t)gfn) << PAGE_SHIFT; l2_pgentry_t *l2e; l1_pgentry_t *l1e; + unsigned long flags; ASSERT(paging_mode_translate(d)); @@ -1455,7 +1465,8 @@ l1e = map_domain_page(mfn_x(mfn)); l1e += l1_table_offset(addr); pod_retry_l1: - if ( (l1e_get_flags(*l1e) & _PAGE_PRESENT) == 0 ) + flags = l1e_get_flags(*l1e); + if ((( flags & _PAGE_PRESENT) == 0) && (!p2m_is_paging(p2m_flags_to_type(flags)))) { /* PoD: Try to populate */ if ( p2m_flags_to_type(l1e_get_flags(*l1e)) == p2m_populate_on_demand ) { if ( q != p2m_query ) { if ( !p2m_pod_check_and_populate(p2m, gfn, (l1_pgentry_t *)l1e, PAGE_ORDER_4K, q) ) goto pod_retry_l1; } else *t = p2m_populate_on_demand; } unmap_domain_page(l1e); return _mfn(INVALID_MFN); } mfn = _mfn(l1e_get_pfn(*l1e)); *t = p2m_flags_to_type(l1e_get_flags(*l1e)); unmap_domain_page(l1e); > } > mfn = _mfn(l1e_get_pfn(*l1e)); > *t = p2m_flags_to_type(l1e_get_flags(*l1e)); > @@ -914,7 +941,7 @@ static void p2m_change_type_global(struc > flags = l1e_get_flags(l1e[i1]); > if ( p2m_flags_to_type(flags) != ot ) > continue; > - mfn = l1e_get_pfn(l1e[i1]); > + mfn = unclip_mfn(l1e_get_pfn(l1e[i1])); > gfn = i1 + (i2 + (i3 > #if CONFIG_PAGING_LEVELS >= 4 > + (i4 * L3_PAGETABLE_ENTRIES) > @@ -923,7 +950,7 @@ static void p2m_change_type_global(struc > * L2_PAGETABLE_ENTRIES) * L1_PAGETABLE_ENTRIES; > /* create a new 1le entry with the new type */ > flags = p2m_type_to_flags(nt, _mfn(mfn)); > - l1e_content = l1e_from_pfn(mfn, flags); > + l1e_content = l1e_from_pfn(clipped_mfn(mfn), flags); > p2m->write_p2m_entry(p2m, gfn, &l1e[i1], > l1mfn, l1e_content, 1); > } > @@ -1073,7 +1100,7 @@ long p2m_pt_audit_p2m(struct p2m_domain > entry_count++; > continue; > } > - mfn = l1e_get_pfn(l1e[i1]); > + mfn = unclip_mfn(l1e_get_pfn(l1e[i1])); > ASSERT(mfn_valid(_mfn(mfn))); > m2pfn = get_gpfn_from_mfn(mfn); > if ( m2pfn != gfn && > > > _______________________________________________ > Xen-devel mailing list > Xen-devel@xxxxxxxxxxxxxxxxxxx > http://lists.xensource.com/xen-devel _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-devel

©2013 Xen Project, A Linux Foundation Collaborative Project. All Rights Reserved.
Linux Foundation is a registered trademark of The Linux Foundation.
Xen Project is a trademark of The Linux Foundation.