[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH 7/8] x86/EPT: split super pages upon mismatching memory types



... between constituent pages. To indicate such, the page order is
being passed down to the vMTRR routines, with a negative return value
(possible only on order-non-zero pages) indicating such collisions.

Some code redundancy reduction is being done to ept_set_entry() along
the way, allowing the new handling to be centralized to a single place
there.

In order to keep ept_set_entry() fast and simple, the actual splitting
is being deferred to the EPT_MISCONFIG VM exit handler.

Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
---
One open question is whether it is okay for memory allocation (for an
intermediate page table) failure to be fatal to the domain. If it
isn't, then deferring the splitting work from ept_set_entry() to
ept_handle_misconfig() is not an option. But even with that eliminated
there's then still potential for ept_handle_misconfig() needing to
split pages, and it would then need to be determined how to gracefully
handle that.

--- a/xen/arch/x86/hvm/mtrr.c
+++ b/xen/arch/x86/hvm/mtrr.c
@@ -222,30 +222,40 @@ void hvm_vcpu_cacheattr_destroy(struct v
 
 /*
  * Get MTRR memory type for physical address pa.
+ *
+ * May return a negative value when order > 0, indicating to the caller
+ * that the respective mapping needs splitting.
  */
-static uint8_t get_mtrr_type(struct mtrr_state *m, paddr_t pa)
+static int get_mtrr_type(const struct mtrr_state *m,
+                         paddr_t pa, unsigned int order)
 {
-   int32_t     addr, seg, index;
    uint8_t     overlap_mtrr = 0;
    uint8_t     overlap_mtrr_pos = 0;
-   uint64_t    phys_base;
-   uint64_t    phys_mask;
-   uint8_t     num_var_ranges = m->mtrr_cap & 0xff;
+   uint64_t    mask = -(uint64_t)PAGE_SIZE << order;
+   unsigned int seg, num_var_ranges = m->mtrr_cap & 0xff;
 
    if ( unlikely(!(m->enabled & 0x2)) )
        return MTRR_TYPE_UNCACHABLE;
 
+   pa &= mask;
    if ( (pa < 0x100000) && (m->enabled & 1) )
    {
-       /* Fixed range MTRR takes effective */
-       addr = (uint32_t) pa;
+       /* Fixed range MTRR takes effect. */
+       uint32_t addr = (uint32_t)pa, index;
+
        if ( addr < 0x80000 )
        {
+           /* 0x00000 ... 0x7FFFF in 64k steps */
+           if ( order > 4 )
+               return -1;
            seg = (addr >> 16);
            return m->fixed_ranges[seg];
        }
        else if ( addr < 0xc0000 )
        {
+           /* 0x80000 ... 0xBFFFF in 16k steps */
+           if ( order > 2 )
+               return -1;
            seg = (addr - 0x80000) >> 14;
            index = (seg >> 3) + 1;
            seg &= 7;            /* select 0-7 segments */
@@ -253,7 +263,9 @@ static uint8_t get_mtrr_type(struct mtrr
        }
        else
        {
-           /* 0xC0000 --- 0x100000 */
+           /* 0xC0000 ... 0xFFFFF in 4k steps */
+           if ( order )
+               return -1;
            seg = (addr - 0xc0000) >> 12;
            index = (seg >> 3) + 3;
            seg &= 7;            /* select 0-7 segments */
@@ -264,14 +276,15 @@ static uint8_t get_mtrr_type(struct mtrr
    /* Match with variable MTRRs. */
    for ( seg = 0; seg < num_var_ranges; seg++ )
    {
-       phys_base = ((uint64_t*)m->var_ranges)[seg*2];
-       phys_mask = ((uint64_t*)m->var_ranges)[seg*2 + 1];
+       uint64_t phys_base = m->var_ranges[seg].base;
+       uint64_t phys_mask = m->var_ranges[seg].mask;
+
        if ( phys_mask & MTRR_PHYSMASK_VALID )
        {
-           if ( ((uint64_t) pa & phys_mask) >> MTRR_PHYSMASK_SHIFT ==
-                (phys_base & phys_mask) >> MTRR_PHYSMASK_SHIFT )
+           phys_mask &= mask;
+           if ( (pa & phys_mask) == (phys_base & phys_mask) )
            {
-               if ( unlikely(m->overlapped) )
+               if ( unlikely(m->overlapped) || order )
                {
                     overlap_mtrr |= 1 << (phys_base & MTRR_PHYSBASE_TYPE_MASK);
                     overlap_mtrr_pos = phys_base & MTRR_PHYSBASE_TYPE_MASK;
@@ -285,23 +298,24 @@ static uint8_t get_mtrr_type(struct mtrr
        }
    }
 
-   /* Overlapped or not found. */
+   /* Not found? */
    if ( unlikely(overlap_mtrr == 0) )
        return m->def_type;
 
-   if ( likely(!(overlap_mtrr & ~( ((uint8_t)1) << overlap_mtrr_pos ))) )
-       /* Covers both one variable memory range matches and
-        * two or more identical match.
-        */
+   /* One match, or multiple identical ones? */
+   if ( likely(overlap_mtrr == (1 << overlap_mtrr_pos)) )
        return overlap_mtrr_pos;
 
+   if ( order )
+       return -1;
+
+   /* Two or more matches, one being UC? */
    if ( overlap_mtrr & (1 << MTRR_TYPE_UNCACHABLE) )
-       /* Two or more match, one is UC. */
        return MTRR_TYPE_UNCACHABLE;
 
-   if ( !(overlap_mtrr &
-          ~((1 << MTRR_TYPE_WRTHROUGH) | (1 << MTRR_TYPE_WRBACK))) )
-       /* Two or more match, WT and WB. */
+   /* Two or more matches, all of them WT and WB? */
+   if ( overlap_mtrr ==
+        ((1 << MTRR_TYPE_WRTHROUGH) | (1 << MTRR_TYPE_WRBACK)) )
        return MTRR_TYPE_WRTHROUGH;
 
    /* Behaviour is undefined, but return the last overlapped type. */
@@ -341,7 +355,7 @@ static uint8_t effective_mm_type(struct 
      * just use it
      */ 
     if ( gmtrr_mtype == NO_HARDCODE_MEM_TYPE )
-        mtrr_mtype = get_mtrr_type(m, gpa);
+        mtrr_mtype = get_mtrr_type(m, gpa, 0);
     else
         mtrr_mtype = gmtrr_mtype;
 
@@ -370,7 +384,7 @@ uint32_t get_pat_flags(struct vcpu *v,
     guest_eff_mm_type = effective_mm_type(g, pat, gpaddr, 
                                           gl1e_flags, gmtrr_mtype);
     /* 2. Get the memory type of host physical address, with MTRR */
-    shadow_mtrr_type = get_mtrr_type(&mtrr_state, spaddr);
+    shadow_mtrr_type = get_mtrr_type(&mtrr_state, spaddr, 0);
 
     /* 3. Find the memory type in PAT, with host MTRR memory type
      * and guest effective memory type.
@@ -703,10 +717,10 @@ void memory_type_changed(struct domain *
         p2m_memory_type_changed(d);
 }
 
-uint8_t epte_get_entry_emt(struct domain *d, unsigned long gfn, mfn_t mfn,
-                           uint8_t *ipat, bool_t direct_mmio)
+int epte_get_entry_emt(struct domain *d, unsigned long gfn, mfn_t mfn,
+                       unsigned int order, uint8_t *ipat, bool_t direct_mmio)
 {
-    uint8_t gmtrr_mtype, hmtrr_mtype;
+    int gmtrr_mtype, hmtrr_mtype;
     uint32_t type;
     struct vcpu *v = current;
 
@@ -747,10 +761,12 @@ uint8_t epte_get_entry_emt(struct domain
     }
 
     gmtrr_mtype = is_hvm_domain(d) && v ?
-                  get_mtrr_type(&v->arch.hvm_vcpu.mtrr, (gfn << PAGE_SHIFT)) :
+                  get_mtrr_type(&v->arch.hvm_vcpu.mtrr,
+                                gfn << PAGE_SHIFT, order) :
                   MTRR_TYPE_WRBACK;
-
-    hmtrr_mtype = get_mtrr_type(&mtrr_state, (mfn_x(mfn) << PAGE_SHIFT));
+    hmtrr_mtype = get_mtrr_type(&mtrr_state, mfn_x(mfn) << PAGE_SHIFT, order);
+    if ( gmtrr_mtype < 0 || hmtrr_mtype < 0 )
+        return -1;
 
     /* If both types match we're fine. */
     if ( likely(gmtrr_mtype == hmtrr_mtype) )
--- a/xen/arch/x86/mm/p2m-ept.c
+++ b/xen/arch/x86/mm/p2m-ept.c
@@ -289,6 +289,7 @@ ept_set_entry(struct p2m_domain *p2m, un
     int vtd_pte_present = 0;
     int needs_sync = 1;
     ept_entry_t old_entry = { .epte = 0 };
+    ept_entry_t new_entry = { .epte = 0 };
     struct ept_data *ept = &p2m->ept;
     struct domain *d = p2m->domain;
 
@@ -338,7 +339,6 @@ ept_set_entry(struct p2m_domain *p2m, un
     if ( i == target )
     {
         /* We reached the target level. */
-        ept_entry_t new_entry = { .epte = 0 };
 
         /* No need to flush if the old entry wasn't valid */
         if ( !is_epte_present(ept_entry) )
@@ -349,35 +349,11 @@ ept_set_entry(struct p2m_domain *p2m, un
          *
          * Read-then-write is OK because we hold the p2m lock. */
         old_entry = *ept_entry;
-
-        if ( mfn_valid(mfn_x(mfn)) || direct_mmio || p2m_is_paged(p2mt) ||
-             (p2mt == p2m_ram_paging_in) )
-        {
-            /* Construct the new entry, and then write it once */
-            new_entry.emt = epte_get_entry_emt(p2m->domain, gfn, mfn, &ipat,
-                                                direct_mmio);
-
-            new_entry.ipat = ipat;
-            new_entry.sp = !!order;
-            new_entry.sa_p2mt = p2mt;
-            new_entry.access = p2ma;
-            new_entry.rsvd2_snp = (iommu_enabled && iommu_snoop);
-
-            new_entry.mfn = mfn_x(mfn);
-
-            if ( old_entry.mfn == new_entry.mfn )
-                need_modify_vtd_table = 0;
-
-            ept_p2m_type_to_flags(&new_entry, p2mt, p2ma);
-        }
-
-        atomic_write_ept_entry(ept_entry, new_entry);
     }
     else
     {
         /* We need to split the original page. */
         ept_entry_t split_ept_entry;
-        ept_entry_t new_entry = { .epte = 0 };
 
         ASSERT(is_epte_superpage(ept_entry));
 
@@ -401,8 +377,19 @@ ept_set_entry(struct p2m_domain *p2m, un
         ASSERT(i == target);
 
         ept_entry = table + (gfn_remainder >> (i * EPT_TABLE_ORDER));
+    }
+
+    if ( mfn_valid(mfn_x(mfn)) || direct_mmio || p2m_is_paged(p2mt) ||
+         (p2mt == p2m_ram_paging_in) )
+    {
+        int emt = epte_get_entry_emt(p2m->domain, gfn, mfn,
+                                     i * EPT_TABLE_ORDER, &ipat, direct_mmio);
+
+        if ( emt >= 0 )
+            new_entry.emt = emt;
+        else /* ept_handle_misconfig() will need to take care of this. */
+            new_entry.emt = MTRR_NUM_TYPES;
 
-        new_entry.emt = epte_get_entry_emt(d, gfn, mfn, &ipat, direct_mmio);
         new_entry.ipat = ipat;
         new_entry.sp = !!i;
         new_entry.sa_p2mt = p2mt;
@@ -417,10 +404,10 @@ ept_set_entry(struct p2m_domain *p2m, un
              need_modify_vtd_table = 0;
 
         ept_p2m_type_to_flags(&new_entry, p2mt, p2ma);
-
-        atomic_write_ept_entry(ept_entry, new_entry);
     }
 
+    atomic_write_ept_entry(ept_entry, new_entry);
+
     /* Track the highest gfn for which we have ever had a valid mapping */
     if ( p2mt != p2m_invalid &&
          (gfn + (1UL << order) - 1 > p2m->max_mapped_pfn) )
@@ -737,7 +724,7 @@ bool_t ept_handle_misconfig(uint64_t gpa
                     if ( !is_epte_valid(&e) || !is_epte_present(&e) )
                         continue;
                     e.emt = epte_get_entry_emt(p2m->domain, gfn + i,
-                                               _mfn(e.mfn), &ipat,
+                                               _mfn(e.mfn), 0, &ipat,
                                                e.sa_p2mt == p2m_mmio_direct);
                     e.ipat = ipat;
                     atomic_write_ept_entry(&epte[i], e);
@@ -745,9 +732,22 @@ bool_t ept_handle_misconfig(uint64_t gpa
             }
             else
             {
-                e.emt = epte_get_entry_emt(p2m->domain, gfn, _mfn(e.mfn),
-                                           &ipat,
-                                           e.sa_p2mt == p2m_mmio_direct);
+                int emt = epte_get_entry_emt(p2m->domain, gfn, _mfn(e.mfn),
+                                             level * EPT_TABLE_ORDER, &ipat,
+                                             e.sa_p2mt == p2m_mmio_direct);
+                if ( unlikely(emt < 0) )
+                {
+                    unmap_domain_page(epte);
+                    if ( ept_split_super_page(p2m, &e, level, level - 1) )
+                    {
+                        mfn = e.mfn;
+                        continue;
+                    }
+                    ept_free_entry(p2m, &e, level);
+                    okay = 0;
+                    break;
+                }
+                e.emt = emt;
                 e.ipat = ipat;
                 atomic_write_ept_entry(&epte[i], e);
             }
--- a/xen/include/asm-x86/mtrr.h
+++ b/xen/include/asm-x86/mtrr.h
@@ -72,8 +72,9 @@ extern int mtrr_del_page(int reg, unsign
 extern void mtrr_centaur_report_mcr(int mcr, u32 lo, u32 hi);
 extern u32 get_pat_flags(struct vcpu *v, u32 gl1e_flags, paddr_t gpaddr,
                   paddr_t spaddr, uint8_t gmtrr_mtype);
-extern uint8_t epte_get_entry_emt(struct domain *d, unsigned long gfn,
-                                  mfn_t mfn, uint8_t *ipat, bool_t 
direct_mmio);
+extern int epte_get_entry_emt(struct domain *, unsigned long gfn, mfn_t mfn,
+                              unsigned int order, uint8_t *ipat,
+                              bool_t direct_mmio);
 extern void ept_change_entry_emt_with_range(
     struct domain *d, unsigned long start_gfn, unsigned long end_gfn);
 extern unsigned char pat_type_2_pte_flags(unsigned char pat_type);


Attachment: EPT-split-on-mixed-memory-types.patch
Description: Text document

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.