[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH v2 3/3] x86/mm-locks: apply a bias to lock levels for control domain



paging_log_dirty_op function takes mm locks from a subject domain and
then attempts to perform copy to operations against the caller domain
in order to copy the result of the hypercall into the caller provided
buffer.

This works fine when the caller is a non-paging domain, but triggers a
lock order panic when the caller is a paging domain due to the fact
that at the point where the copy to operation is performed the subject
domain paging lock is locked, and the copy operation requires
locking the caller p2m lock which has a lower level.

Fix this limitation by adding a bias to the level of control domain mm
locks, so that the lower control domain mm lock always has a level
greater than the higher unprivileged domain lock level. This allows
locking the subject domain mm locks and then locking the control
domain mm locks, while keeping the same lock ordering and the changes
mostly confined to mm-locks.h.

Note that so far only this flow (locking a subject domain locks and
then the control domain ones) has been identified, but not all
possible code paths have been inspected. Hence this solution attempts
to be a non-intrusive fix for the problem at hand, without discarding
further changes in the future if other valid code paths are found that
require more complex lock level ordering.

Signed-off-by: Roger Pau Monné <roger.pau@xxxxxxxxxx>
---
Cc: George Dunlap <george.dunlap@xxxxxxxxxxxxx>
Cc: Jan Beulich <jbeulich@xxxxxxxx>
Cc: Andrew Cooper <andrew.cooper3@xxxxxxxxxx>
Cc: Wei Liu <wei.liu2@xxxxxxxxxx>
Cc: Tim Deegan <tim@xxxxxxx>
---
Changes since v1:
 - Boost only control domain mm lock levels instead of the caller.
---
 xen/arch/x86/mm/mm-locks.h | 119 +++++++++++++++++++++++--------------
 xen/arch/x86/mm/p2m-pod.c  |   5 +-
 2 files changed, 78 insertions(+), 46 deletions(-)

diff --git a/xen/arch/x86/mm/mm-locks.h b/xen/arch/x86/mm/mm-locks.h
index d3497713e9..d6c073dc5c 100644
--- a/xen/arch/x86/mm/mm-locks.h
+++ b/xen/arch/x86/mm/mm-locks.h
@@ -50,15 +50,35 @@ static inline int _get_lock_level(void)
     return this_cpu(mm_lock_level);
 }
 
+#define MM_LOCK_ORDER_MAX                    64
+/*
+ * Return the lock level taking the domain bias into account. If the domain is
+ * privileged a bias of MM_LOCK_ORDER_MAX is applied to the lock level, so that
+ * mm locks that belong to a control domain can be acquired after having
+ * acquired mm locks of an unprivileged domain.
+ *
+ * This is required in order to use some hypercalls from a paging domain that
+ * take locks of a subject domain and then attempt to copy data to/from the
+ * caller domain.
+ */
+static inline int _lock_level(const struct domain *d, int l)
+{
+    ASSERT(l <= MM_LOCK_ORDER_MAX);
+
+    return l + (d && is_control_domain(d) ? MM_LOCK_ORDER_MAX : 0);
+}
+
 /*
  * If you see this crash, the numbers printed are order levels defined
  * in this file.
  */
-static inline void _check_lock_level(int l)
+static inline void _check_lock_level(const struct domain *d, int l)
 {
-    if ( unlikely(_get_lock_level() > l) )
+    int lvl = _lock_level(d, l);
+
+    if ( unlikely(_get_lock_level() > lvl) )
     {
-        printk("mm locking order violation: %i > %i\n", _get_lock_level(), l);
+        printk("mm locking order violation: %i > %i\n", _get_lock_level(), 
lvl);
         BUG();
     }
 }
@@ -68,10 +88,11 @@ static inline void _set_lock_level(int l)
     this_cpu(mm_lock_level) = l;
 }
 
-static inline void _mm_lock(mm_lock_t *l, const char *func, int level, int rec)
+static inline void _mm_lock(const struct domain *d, mm_lock_t *l,
+                            const char *func, int level, int rec)
 {
     if ( !((mm_locked_by_me(l)) && rec) )
-        _check_lock_level(level);
+        _check_lock_level(d, level);
     spin_lock_recursive(&l->lock);
     if ( l->lock.recurse_cnt == 1 )
     {
@@ -80,16 +101,17 @@ static inline void _mm_lock(mm_lock_t *l, const char 
*func, int level, int rec)
     }
     else if ( (unlikely(!rec)) )
         panic("mm lock already held by %s\n", l->locker_function);
-    _set_lock_level(level);
+    _set_lock_level(_lock_level(d, level));
 }
 
-static inline void _mm_enforce_order_lock_pre(int level)
+static inline void _mm_enforce_order_lock_pre(const struct domain *d, int 
level)
 {
-    _check_lock_level(level);
+    _check_lock_level(d, level);
 }
 
-static inline void _mm_enforce_order_lock_post(int level, int *unlock_level,
-                                                unsigned short *recurse_count)
+static inline void _mm_enforce_order_lock_post(const struct domain *d, int 
level,
+                                               int *unlock_level,
+                                               unsigned short *recurse_count)
 {
     if ( recurse_count )
     {
@@ -100,7 +122,7 @@ static inline void _mm_enforce_order_lock_post(int level, 
int *unlock_level,
     } else {
         *unlock_level = _get_lock_level();
     }
-    _set_lock_level(level);
+    _set_lock_level(_lock_level(d, level));
 }
 
 
@@ -117,16 +139,17 @@ static inline int mm_write_locked_by_me(mm_rwlock_t *l)
     return (l->locker == get_processor_id());
 }
 
-static inline void _mm_write_lock(mm_rwlock_t *l, const char *func, int level)
+static inline void _mm_write_lock(const struct domain *d, mm_rwlock_t *l,
+                                  const char *func, int level)
 {
     if ( !mm_write_locked_by_me(l) )
     {
-        _check_lock_level(level);
+        _check_lock_level(d, level);
         percpu_write_lock(p2m_percpu_rwlock, &l->lock);
         l->locker = get_processor_id();
         l->locker_function = func;
         l->unlock_level = _get_lock_level();
-        _set_lock_level(level);
+        _set_lock_level(_lock_level(d, level));
     }
     l->recurse_count++;
 }
@@ -141,9 +164,10 @@ static inline void mm_write_unlock(mm_rwlock_t *l)
     percpu_write_unlock(p2m_percpu_rwlock, &l->lock);
 }
 
-static inline void _mm_read_lock(mm_rwlock_t *l, int level)
+static inline void _mm_read_lock(const struct domain *d, mm_rwlock_t *l,
+                                 int level)
 {
-    _check_lock_level(level);
+    _check_lock_level(d, level);
     percpu_read_lock(p2m_percpu_rwlock, &l->lock);
     /* There's nowhere to store the per-CPU unlock level so we can't
      * set the lock level. */
@@ -156,28 +180,32 @@ static inline void mm_read_unlock(mm_rwlock_t *l)
 
 /* This wrapper uses the line number to express the locking order below */
 #define declare_mm_lock(name)                                                 \
-    static inline void mm_lock_##name(mm_lock_t *l, const char *func, int rec)\
-    { _mm_lock(l, func, MM_LOCK_ORDER_##name, rec); }
+    static inline void mm_lock_##name(const struct domain *d, mm_lock_t *l,   \
+                                      const char *func, int rec)              \
+    { _mm_lock(d, l, func, MM_LOCK_ORDER_##name, rec); }
 #define declare_mm_rwlock(name)                                               \
-    static inline void mm_write_lock_##name(mm_rwlock_t *l, const char *func) \
-    { _mm_write_lock(l, func, MM_LOCK_ORDER_##name); }                         
           \
-    static inline void mm_read_lock_##name(mm_rwlock_t *l)                    \
-    { _mm_read_lock(l, MM_LOCK_ORDER_##name); }
+    static inline void mm_write_lock_##name(const struct domain *d,           \
+                                            mm_rwlock_t *l, const char *func) \
+    { _mm_write_lock(d, l, func, MM_LOCK_ORDER_##name); }                     \
+    static inline void mm_read_lock_##name(const struct domain *d,            \
+                                           mm_rwlock_t *l)                    \
+    { _mm_read_lock(d, l, MM_LOCK_ORDER_##name); }
 /* These capture the name of the calling function */
-#define mm_lock(name, l) mm_lock_##name(l, __func__, 0)
-#define mm_lock_recursive(name, l) mm_lock_##name(l, __func__, 1)
-#define mm_write_lock(name, l) mm_write_lock_##name(l, __func__)
-#define mm_read_lock(name, l) mm_read_lock_##name(l)
+#define mm_lock(name, d, l) mm_lock_##name(d, l, __func__, 0)
+#define mm_lock_recursive(name, d, l) mm_lock_##name(d, l, __func__, 1)
+#define mm_write_lock(name, d, l) mm_write_lock_##name(d, l, __func__)
+#define mm_read_lock(name, d, l) mm_read_lock_##name(d, l)
 
 /* This wrapper is intended for "external" locks which do not use
  * the mm_lock_t types. Such locks inside the mm code are also subject
  * to ordering constraints. */
-#define declare_mm_order_constraint(name)                                   \
-    static inline void mm_enforce_order_lock_pre_##name(void)               \
-    { _mm_enforce_order_lock_pre(MM_LOCK_ORDER_##name); }                      
         \
-    static inline void mm_enforce_order_lock_post_##name(                   \
-                        int *unlock_level, unsigned short *recurse_count)   \
-    { _mm_enforce_order_lock_post(MM_LOCK_ORDER_##name, unlock_level, 
recurse_count); } \
+#define declare_mm_order_constraint(name)                                      
 \
+    static inline void mm_enforce_order_lock_pre_##name(const struct domain 
*d) \
+    { _mm_enforce_order_lock_pre(d, MM_LOCK_ORDER_##name); }                   
 \
+    static inline void mm_enforce_order_lock_post_##name(const struct domain 
*d,\
+                        int *unlock_level, unsigned short *recurse_count)      
 \
+    { _mm_enforce_order_lock_post(d, MM_LOCK_ORDER_##name, unlock_level,       
 \
+                                  recurse_count); }
 
 static inline void mm_unlock(mm_lock_t *l)
 {
@@ -221,7 +249,7 @@ static inline void mm_enforce_order_unlock(int unlock_level,
 
 #define MM_LOCK_ORDER_nestedp2m               8
 declare_mm_lock(nestedp2m)
-#define nestedp2m_lock(d)   mm_lock(nestedp2m, &(d)->arch.nested_p2m_lock)
+#define nestedp2m_lock(d)   mm_lock(nestedp2m, d, &(d)->arch.nested_p2m_lock)
 #define nestedp2m_unlock(d) mm_unlock(&(d)->arch.nested_p2m_lock)
 
 /* P2M lock (per-non-alt-p2m-table)
@@ -260,9 +288,10 @@ declare_mm_rwlock(p2m);
 
 #define MM_LOCK_ORDER_per_page_sharing       24
 declare_mm_order_constraint(per_page_sharing)
-#define page_sharing_mm_pre_lock()   
mm_enforce_order_lock_pre_per_page_sharing()
+#define page_sharing_mm_pre_lock() \
+        mm_enforce_order_lock_pre_per_page_sharing(NULL)
 #define page_sharing_mm_post_lock(l, r) \
-        mm_enforce_order_lock_post_per_page_sharing((l), (r))
+        mm_enforce_order_lock_post_per_page_sharing(NULL, (l), (r))
 #define page_sharing_mm_unlock(l, r) mm_enforce_order_unlock((l), (r))
 
 /* Alternate P2M list lock (per-domain)
@@ -275,7 +304,8 @@ declare_mm_order_constraint(per_page_sharing)
 
 #define MM_LOCK_ORDER_altp2mlist             32
 declare_mm_lock(altp2mlist)
-#define altp2m_list_lock(d)   mm_lock(altp2mlist, &(d)->arch.altp2m_list_lock)
+#define altp2m_list_lock(d)   mm_lock(altp2mlist, d, \
+                                      &(d)->arch.altp2m_list_lock)
 #define altp2m_list_unlock(d) mm_unlock(&(d)->arch.altp2m_list_lock)
 
 /* P2M lock (per-altp2m-table)
@@ -294,9 +324,9 @@ declare_mm_rwlock(altp2m);
 static inline void p2m_lock(struct p2m_domain *p)
 {
     if ( p2m_is_altp2m(p) )
-        mm_write_lock(altp2m, &p->lock);
+        mm_write_lock(altp2m, p->domain, &p->lock);
     else
-        mm_write_lock(p2m, &p->lock);
+        mm_write_lock(p2m, p->domain, &p->lock);
     p->defer_flush++;
 }
 
@@ -310,7 +340,7 @@ static inline void p2m_unlock(struct p2m_domain *p)
 
 #define gfn_lock(p,g,o)       p2m_lock(p)
 #define gfn_unlock(p,g,o)     p2m_unlock(p)
-#define p2m_read_lock(p)      mm_read_lock(p2m, &(p)->lock)
+#define p2m_read_lock(p)      mm_read_lock(p2m, (p)->domain, &(p)->lock)
 #define p2m_read_unlock(p)    mm_read_unlock(&(p)->lock)
 #define p2m_locked_by_me(p)   mm_write_locked_by_me(&(p)->lock)
 #define gfn_locked_by_me(p,g) p2m_locked_by_me(p)
@@ -322,7 +352,7 @@ static inline void p2m_unlock(struct p2m_domain *p)
 
 #define MM_LOCK_ORDER_pod                    48
 declare_mm_lock(pod)
-#define pod_lock(p)           mm_lock(pod, &(p)->pod.lock)
+#define pod_lock(p)           mm_lock(pod, (p)->domain, &(p)->pod.lock)
 #define pod_unlock(p)         mm_unlock(&(p)->pod.lock)
 #define pod_locked_by_me(p)   mm_locked_by_me(&(p)->pod.lock)
 
@@ -335,8 +365,9 @@ declare_mm_lock(pod)
 
 #define MM_LOCK_ORDER_page_alloc             56
 declare_mm_order_constraint(page_alloc)
-#define page_alloc_mm_pre_lock()   mm_enforce_order_lock_pre_page_alloc()
-#define page_alloc_mm_post_lock(l) mm_enforce_order_lock_post_page_alloc(&(l), 
NULL)
+#define page_alloc_mm_pre_lock(d)  mm_enforce_order_lock_pre_page_alloc(d)
+#define page_alloc_mm_post_lock(d, l) \
+        mm_enforce_order_lock_post_page_alloc(d, &(l), NULL)
 #define page_alloc_mm_unlock(l)    mm_enforce_order_unlock((l), NULL)
 
 /* Paging lock (per-domain)
@@ -356,9 +387,9 @@ declare_mm_order_constraint(page_alloc)
 
 #define MM_LOCK_ORDER_paging                 64
 declare_mm_lock(paging)
-#define paging_lock(d)         mm_lock(paging, &(d)->arch.paging.lock)
+#define paging_lock(d)         mm_lock(paging, d, &(d)->arch.paging.lock)
 #define paging_lock_recursive(d) \
-                    mm_lock_recursive(paging, &(d)->arch.paging.lock)
+                    mm_lock_recursive(paging, d, &(d)->arch.paging.lock)
 #define paging_unlock(d)       mm_unlock(&(d)->arch.paging.lock)
 #define paging_locked_by_me(d) mm_locked_by_me(&(d)->arch.paging.lock)
 
diff --git a/xen/arch/x86/mm/p2m-pod.c b/xen/arch/x86/mm/p2m-pod.c
index 4c56cb58c6..4313863066 100644
--- a/xen/arch/x86/mm/p2m-pod.c
+++ b/xen/arch/x86/mm/p2m-pod.c
@@ -34,9 +34,10 @@
 /* Enforce lock ordering when grabbing the "external" page_alloc lock */
 static inline void lock_page_alloc(struct p2m_domain *p2m)
 {
-    page_alloc_mm_pre_lock();
+    page_alloc_mm_pre_lock(p2m->domain);
     spin_lock(&(p2m->domain->page_alloc_lock));
-    page_alloc_mm_post_lock(p2m->domain->arch.page_alloc_unlock_level);
+    page_alloc_mm_post_lock(p2m->domain,
+                            p2m->domain->arch.page_alloc_unlock_level);
 }
 
 static inline void unlock_page_alloc(struct p2m_domain *p2m)
-- 
2.20.1


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxxx
https://lists.xenproject.org/mailman/listinfo/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.