[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-changelog] [xen master] x86/EPT: force re-evaluation of memory type as necessary



commit aa9114edd97b292cd89b3616e3f2089471fd2201
Author:     Jan Beulich <jbeulich@xxxxxxxx>
AuthorDate: Thu Apr 10 16:01:41 2014 +0200
Commit:     Jan Beulich <jbeulich@xxxxxxxx>
CommitDate: Thu Apr 10 16:01:41 2014 +0200

    x86/EPT: force re-evaluation of memory type as necessary
    
    The main goal here is to drop the bogus dependency of
    epte_get_entry_emt() on d->arch.hvm_domain.params[HVM_PARAM_IDENT_PT].
    
    Any change to state influencing epte_get_entry_emt()'s decision needs
    to result in re-calculation. Do this by using the EPT_MISCONFIG VM
    exit, storing an invalid memory type into EPT's emt field (leaving the
    IOMMU, which doesn't care about memory types, unaffected).
    
    This is being done in a hierarchical manner to keep execution time
    down: Initially only the top level directory gets invalidated this way.
    Upon access, the involved intermediate page table levels get cleared
    back to zero, and the leaf entry gets its field properly set. For 4k
    leaves all other entries in the same directory also get processed to
    amortize the cost of the extra VM exit (which halved the number of
    these VM exits in my testing).
    
    This restoring can result in spurious EPT_MISCONFIG VM exits (since
    two vCPU-s may access addresses involving identical page table
    structures). Rather than simply returning in such cases (and risking
    that such a VM exit results from a real mis-configuration, which
    would then result in an endless loop rather than killing the VM), a
    per-vCPU flag is being introduced indicating when such a spurious VM
    exit might validly happen - if another one occurs right after VM re-
    entry, the flag would generally end up being clear, causing the VM
    to be killed as before on such VM exits.
    
    Note that putting a reserved memory type value in the EPT structures
    isn't formally sanctioned by the specification. Intel isn't willing to
    adjust the specification to make this or a similar use of the
    EPT_MISCONFIG VM exit formally possible, but they have indicated that
    us using this is low risk wrt forward compatibility.
    
    Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
    Reviewed-by: Tim Deegan <tim@xxxxxxx>
    Acked-by: Kevin Tian <kevin.tian@xxxxxxxxx>
---
 xen/arch/x86/domctl.c              |    6 ++
 xen/arch/x86/hvm/hvm.c             |    3 +
 xen/arch/x86/hvm/mtrr.c            |   20 ++++-
 xen/arch/x86/hvm/vmx/vmx.c         |   10 +++
 xen/arch/x86/mm/p2m-ept.c          |  139 +++++++++++++++++++++++++++++++++++-
 xen/arch/x86/mm/p2m.c              |   12 +++
 xen/common/domctl.c                |    4 +
 xen/include/asm-x86/hvm/vmx/vmcs.h |    3 +
 xen/include/asm-x86/hvm/vmx/vmx.h  |    1 +
 xen/include/asm-x86/mtrr.h         |    1 +
 xen/include/asm-x86/p2m.h          |    4 +
 11 files changed, 198 insertions(+), 5 deletions(-)

diff --git a/xen/arch/x86/domctl.c b/xen/arch/x86/domctl.c
index 26635ff..d626e6f 100644
--- a/xen/arch/x86/domctl.c
+++ b/xen/arch/x86/domctl.c
@@ -83,6 +83,8 @@ long arch_do_domctl(
             ret = ioports_permit_access(d, fp, fp + np - 1);
         else
             ret = ioports_deny_access(d, fp, fp + np - 1);
+        if ( !ret )
+            memory_type_changed(d);
     }
     break;
 
@@ -706,6 +708,8 @@ long arch_do_domctl(
                        ret, add ? "removing" : "denying", d->domain_id,
                        mfn, mfn + nr_mfns - 1);
         }
+        /* Do this unconditionally to cover errors on above failure paths. */
+        memory_type_changed(d);
     }
     break;
 
@@ -792,6 +796,8 @@ long arch_do_domctl(
                        "ioport_map: error %ld denying dom%d access to 
[%x,%x]\n",
                        ret, d->domain_id, fmp, fmp + np - 1);
         }
+        if ( !ret )
+            memory_type_changed(d);
     }
     break;
 
diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c
index 8c3797e..46634cd 100644
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -252,6 +252,9 @@ int hvm_set_guest_pat(struct vcpu *v, u64 guest_pat)
 
     if ( !hvm_funcs.set_guest_pat(v, guest_pat) )
         v->arch.hvm_vcpu.pat_cr = guest_pat;
+
+    memory_type_changed(v->domain);
+
     return 1;
 }
 
diff --git a/xen/arch/x86/hvm/mtrr.c b/xen/arch/x86/hvm/mtrr.c
index 45c8e7b..ae24efa 100644
--- a/xen/arch/x86/hvm/mtrr.c
+++ b/xen/arch/x86/hvm/mtrr.c
@@ -431,8 +431,12 @@ bool_t mtrr_def_type_msr_set(struct domain *d, struct 
mtrr_state *m,
          return 0;
     }
 
-    m->enabled = enabled;
-    m->def_type = def_type;
+    if ( m->enabled != enabled || m->def_type != def_type )
+    {
+        m->enabled = enabled;
+        m->def_type = def_type;
+        memory_type_changed(d);
+    }
 
     return 1;
 }
@@ -452,6 +456,7 @@ bool_t mtrr_fix_range_msr_set(struct domain *d, struct 
mtrr_state *m,
                 return 0;
 
         fixed_range_base[row] = msr_content;
+        memory_type_changed(d);
     }
 
     return 1;
@@ -496,6 +501,8 @@ bool_t mtrr_var_range_msr_set(
 
     m->overlapped = is_var_mtrr_overlapped(m);
 
+    memory_type_changed(d);
+
     return 1;
 }
 
@@ -690,6 +697,12 @@ static int hvm_load_mtrr_msr(struct domain *d, 
hvm_domain_context_t *h)
 HVM_REGISTER_SAVE_RESTORE(MTRR, hvm_save_mtrr_msr, hvm_load_mtrr_msr,
                           1, HVMSR_PER_VCPU);
 
+void memory_type_changed(struct domain *d)
+{
+    if ( iommu_enabled && !iommu_snoop && d->vcpu && d->vcpu[0] )
+        p2m_memory_type_changed(d);
+}
+
 uint8_t epte_get_entry_emt(struct domain *d, unsigned long gfn, mfn_t mfn,
                            uint8_t *ipat, bool_t direct_mmio)
 {
@@ -733,8 +746,7 @@ uint8_t epte_get_entry_emt(struct domain *d, unsigned long 
gfn, mfn_t mfn,
         return MTRR_TYPE_WRBACK;
     }
 
-    gmtrr_mtype = is_hvm_domain(d) && v &&
-                  d->arch.hvm_domain.params[HVM_PARAM_IDENT_PT] ?
+    gmtrr_mtype = is_hvm_domain(d) && v ?
                   get_mtrr_type(&v->arch.hvm_vcpu.mtrr, (gfn << PAGE_SHIFT)) :
                   MTRR_TYPE_WRBACK;
 
diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
index 77ce167..180cf6c 100644
--- a/xen/arch/x86/hvm/vmx/vmx.c
+++ b/xen/arch/x86/hvm/vmx/vmx.c
@@ -3016,6 +3016,16 @@ void vmx_vmexit_handler(struct cpu_user_regs *regs)
         break;
     }
 
+    case EXIT_REASON_EPT_MISCONFIG:
+    {
+        paddr_t gpa;
+
+        __vmread(GUEST_PHYSICAL_ADDRESS, &gpa);
+        if ( !ept_handle_misconfig(gpa) )
+            goto exit_and_crash;
+        break;
+    }
+
     case EXIT_REASON_MONITOR_TRAP_FLAG:
         v->arch.hvm_vmx.exec_control &= ~CPU_BASED_MONITOR_TRAP_FLAG;
         vmx_update_cpu_exec_control(v);
diff --git a/xen/arch/x86/mm/p2m-ept.c b/xen/arch/x86/mm/p2m-ept.c
index 99a1084..342fc70 100644
--- a/xen/arch/x86/mm/p2m-ept.c
+++ b/xen/arch/x86/mm/p2m-ept.c
@@ -270,6 +270,125 @@ static int ept_next_level(struct p2m_domain *p2m, bool_t 
read_only,
     return GUEST_TABLE_NORMAL_PAGE;
 }
 
+static bool_t ept_invalidate_emt(mfn_t mfn)
+{
+    ept_entry_t *epte = map_domain_page(mfn_x(mfn));
+    unsigned int i;
+    bool_t changed = 0;
+
+    for ( i = 0; i < EPT_PAGETABLE_ENTRIES; i++ )
+    {
+        ept_entry_t e = atomic_read_ept_entry(&epte[i]);
+
+        if ( !is_epte_valid(&e) || !is_epte_present(&e) ||
+             e.emt == MTRR_NUM_TYPES )
+            continue;
+
+        e.emt = MTRR_NUM_TYPES;
+        atomic_write_ept_entry(&epte[i], e);
+        changed = 1;
+    }
+
+    unmap_domain_page(epte);
+
+    return changed;
+}
+
+bool_t ept_handle_misconfig(uint64_t gpa)
+{
+    struct vcpu *curr = current;
+    struct p2m_domain *p2m = p2m_get_hostp2m(curr->domain);
+    struct ept_data *ept = &p2m->ept;
+    unsigned int level = ept_get_wl(ept);
+    unsigned long gfn = PFN_DOWN(gpa);
+    unsigned long mfn = ept_get_asr(ept);
+    ept_entry_t *epte;
+    int okay;
+
+    if ( !mfn )
+        return 0;
+
+    p2m_lock(p2m);
+
+    okay = -curr->arch.hvm_vmx.ept_spurious_misconfig;
+    for ( ; ; --level )
+    {
+        ept_entry_t e;
+        unsigned int i;
+
+        epte = map_domain_page(mfn);
+        i = (gfn >> (level * EPT_TABLE_ORDER)) & (EPT_PAGETABLE_ENTRIES - 1);
+        e = atomic_read_ept_entry(&epte[i]);
+
+        if ( level == 0 || is_epte_superpage(&e) )
+        {
+            uint8_t ipat = 0;
+
+            if ( e.emt != MTRR_NUM_TYPES )
+                break;
+
+            if ( level == 0 )
+            {
+                for ( gfn -= i, i = 0; i < EPT_PAGETABLE_ENTRIES; ++i )
+                {
+                    e = atomic_read_ept_entry(&epte[i]);
+                    if ( e.emt == MTRR_NUM_TYPES )
+                        e.emt = 0;
+                    if ( !is_epte_valid(&e) || !is_epte_present(&e) )
+                        continue;
+                    e.emt = epte_get_entry_emt(p2m->domain, gfn + i,
+                                               _mfn(e.mfn), &ipat,
+                                               e.sa_p2mt == p2m_mmio_direct);
+                    e.ipat = ipat;
+                    atomic_write_ept_entry(&epte[i], e);
+                }
+            }
+            else
+            {
+                e.emt = epte_get_entry_emt(p2m->domain, gfn, _mfn(e.mfn),
+                                           &ipat,
+                                           e.sa_p2mt == p2m_mmio_direct);
+                e.ipat = ipat;
+                atomic_write_ept_entry(&epte[i], e);
+            }
+
+            okay = 1;
+            break;
+        }
+
+        if ( e.emt == MTRR_NUM_TYPES )
+        {
+            ASSERT(is_epte_present(&e));
+            ept_invalidate_emt(_mfn(e.mfn));
+            smp_wmb();
+            e.emt = 0;
+            atomic_write_ept_entry(&epte[i], e);
+            unmap_domain_page(epte);
+            okay = 1;
+        }
+        else if ( is_epte_present(&e) && !e.emt )
+            unmap_domain_page(epte);
+        else
+            break;
+
+        mfn = e.mfn;
+    }
+
+    unmap_domain_page(epte);
+    if ( okay > 0 )
+    {
+        struct vcpu *v;
+
+        for_each_vcpu ( curr->domain, v )
+            v->arch.hvm_vmx.ept_spurious_misconfig = 1;
+    }
+    curr->arch.hvm_vmx.ept_spurious_misconfig = 0;
+    ept_sync_domain(p2m);
+    p2m_unlock(p2m);
+
+    return !!okay;
+}
+
 /*
  * ept_set_entry() computes 'need_modify_vtd_table' for itself,
  * by observing whether any gfn->mfn translations are modified.
@@ -660,6 +779,17 @@ static void ept_change_entry_type_global(struct p2m_domain 
*p2m,
     ept_sync_domain(p2m);
 }
 
+static void ept_memory_type_changed(struct p2m_domain *p2m)
+{
+    unsigned long mfn = ept_get_asr(&p2m->ept);
+
+    if ( !mfn )
+        return;
+
+    if ( ept_invalidate_emt(_mfn(mfn)) )
+        ept_sync_domain(p2m);
+}
+
 static void __ept_sync_domain(void *info)
 {
     struct ept_data *ept = &((struct p2m_domain *)info)->ept;
@@ -697,6 +827,7 @@ int ept_p2m_init(struct p2m_domain *p2m)
     p2m->set_entry = ept_set_entry;
     p2m->get_entry = ept_get_entry;
     p2m->change_entry_type_global = ept_change_entry_type_global;
+    p2m->memory_type_changed = ept_memory_type_changed;
     p2m->audit_p2m = NULL;
 
     /* Set the memory type used when accessing EPT paging structures. */
@@ -737,6 +868,7 @@ static void ept_dump_p2m_table(unsigned char key)
         [MTRR_TYPE_WRTHROUGH]      = "WT",
         [MTRR_TYPE_WRPROT]         = "WP",
         [MTRR_TYPE_WRBACK]         = "WB",
+        [MTRR_NUM_TYPES]           = "??"
     };
 
     for_each_domain(d)
@@ -750,11 +882,16 @@ static void ept_dump_p2m_table(unsigned char key)
 
         for ( gfn = 0; gfn <= p2m->max_mapped_pfn; gfn += 1UL << order )
         {
+            char c = 0;
+
             gfn_remainder = gfn;
             table = map_domain_page(pagetable_get_pfn(p2m_get_pagetable(p2m)));
 
             for ( i = ept_get_wl(ept); i > 0; i-- )
             {
+                ept_entry = table + (gfn_remainder >> (i * EPT_TABLE_ORDER));
+                if ( ept_entry->emt == MTRR_NUM_TYPES )
+                    c = '?';
                 ret = ept_next_level(p2m, 1, &table, &gfn_remainder, i);
                 if ( ret != GUEST_TABLE_NORMAL_PAGE )
                     break;
@@ -775,7 +912,7 @@ static void ept_dump_p2m_table(unsigned char key)
                            memory_types[ept_entry->emt][0],
                            memory_types[ept_entry->emt][1]
                            ?: ept_entry->emt + '0',
-                           ept_entry->ipat ? '!' : ' ');
+                           c ?: ept_entry->ipat ? '!' : ' ');
 
                 if ( !(record_counter++ % 100) )
                     process_pending_softirqs();
diff --git a/xen/arch/x86/mm/p2m.c b/xen/arch/x86/mm/p2m.c
index c38f334..d2de237 100644
--- a/xen/arch/x86/mm/p2m.c
+++ b/xen/arch/x86/mm/p2m.c
@@ -200,6 +200,18 @@ void p2m_change_entry_type_global(struct domain *d,
     p2m_unlock(p2m);
 }
 
+void p2m_memory_type_changed(struct domain *d)
+{
+    struct p2m_domain *p2m = p2m_get_hostp2m(d);
+
+    if ( p2m->memory_type_changed )
+    {
+        p2m_lock(p2m);
+        p2m->memory_type_changed(p2m);
+        p2m_unlock(p2m);
+    }
+}
+
 mfn_t __get_gfn_type_access(struct p2m_domain *p2m, unsigned long gfn,
                     p2m_type_t *t, p2m_access_t *a, p2m_query_t q,
                     unsigned int *page_order, bool_t locked)
diff --git a/xen/common/domctl.c b/xen/common/domctl.c
index 5342e5d..5e807ab 100644
--- a/xen/common/domctl.c
+++ b/xen/common/domctl.c
@@ -815,6 +815,10 @@ long do_domctl(XEN_GUEST_HANDLE_PARAM(xen_domctl_t) 
u_domctl)
             ret = iomem_permit_access(d, mfn, mfn + nr_mfns - 1);
         else
             ret = iomem_deny_access(d, mfn, mfn + nr_mfns - 1);
+#ifdef CONFIG_X86
+        if ( !ret )
+            memory_type_changed(d);
+#endif
     }
     break;
 
diff --git a/xen/include/asm-x86/hvm/vmx/vmcs.h 
b/xen/include/asm-x86/hvm/vmx/vmcs.h
index 45dcfa1..445b39f 100644
--- a/xen/include/asm-x86/hvm/vmx/vmcs.h
+++ b/xen/include/asm-x86/hvm/vmx/vmcs.h
@@ -124,6 +124,9 @@ struct arch_vmx_struct {
 
     unsigned long        host_cr0;
 
+    /* Do we need to tolerate a spurious EPT_MISCONFIG VM exit? */
+    bool_t               ept_spurious_misconfig;
+
     /* Is the guest in real mode? */
     uint8_t              vmx_realmode;
     /* Are we emulating rather than VMENTERing? */
diff --git a/xen/include/asm-x86/hvm/vmx/vmx.h 
b/xen/include/asm-x86/hvm/vmx/vmx.h
index d4030990..2e8cd70 100644
--- a/xen/include/asm-x86/hvm/vmx/vmx.h
+++ b/xen/include/asm-x86/hvm/vmx/vmx.h
@@ -520,6 +520,7 @@ int ept_p2m_init(struct p2m_domain *p2m);
 void ept_p2m_uninit(struct p2m_domain *p2m);
 
 void ept_walk_table(struct domain *d, unsigned long gfn);
+bool_t ept_handle_misconfig(uint64_t gpa);
 void setup_ept_dump(void);
 
 void update_guest_eip(void);
diff --git a/xen/include/asm-x86/mtrr.h b/xen/include/asm-x86/mtrr.h
index 1b5993e..b3f238c 100644
--- a/xen/include/asm-x86/mtrr.h
+++ b/xen/include/asm-x86/mtrr.h
@@ -88,6 +88,7 @@ extern bool_t mtrr_fix_range_msr_set(struct domain *, struct 
mtrr_state *,
                                      uint32_t row, uint64_t msr_content);
 extern bool_t mtrr_def_type_msr_set(struct domain *, struct mtrr_state *,
                                     uint64_t msr_content);
+extern void memory_type_changed(struct domain *);
 extern bool_t pat_msr_set(uint64_t *pat, uint64_t msr);
 
 bool_t is_var_mtrr_overlapped(struct mtrr_state *m);
diff --git a/xen/include/asm-x86/p2m.h b/xen/include/asm-x86/p2m.h
index d644f82..c64f3d0 100644
--- a/xen/include/asm-x86/p2m.h
+++ b/xen/include/asm-x86/p2m.h
@@ -233,6 +233,7 @@ struct p2m_domain {
     void               (*change_entry_type_global)(struct p2m_domain *p2m,
                                                    p2m_type_t ot,
                                                    p2m_type_t nt);
+    void               (*memory_type_changed)(struct p2m_domain *p2m);
     
     void               (*write_p2m_entry)(struct p2m_domain *p2m,
                                           unsigned long gfn, l1_pgentry_t *p,
@@ -506,6 +507,9 @@ void p2m_change_type_range(struct domain *d,
 p2m_type_t p2m_change_type(struct domain *d, unsigned long gfn,
                            p2m_type_t ot, p2m_type_t nt);
 
+/* Report a change affecting memory types. */
+void p2m_memory_type_changed(struct domain *d);
+
 /* Set mmio addresses in the p2m table (for pass-through) */
 int set_mmio_p2m_entry(struct domain *d, unsigned long gfn, mfn_t mfn);
 int clear_mmio_p2m_entry(struct domain *d, unsigned long gfn);
--
generated by git-patchbot for /home/xen/git/xen.git#master

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.