[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-changelog] [IA64] page ref counter



# HG changeset patch
# User awilliam@xxxxxxxxxxx
# Node ID c644eb4049abedac4bbe83c0497b592b1f4bb531
# Parent  405f0f847c0f33381c69a04a168f34150a2bca7d
[IA64] page ref counter

Signed-off-by: Akio Takebe <takebe_akio@xxxxxxxxxxxxxx>
Signed-off-by: Masaki Kanno <kanno.masaki@xxxxxxxxxxxxxx>

diff -r 405f0f847c0f -r c644eb4049ab xen/arch/ia64/xen/xenmisc.c
--- a/xen/arch/ia64/xen/xenmisc.c       Thu Mar  9 23:24:31 2006
+++ b/xen/arch/ia64/xen/xenmisc.c       Fri Mar 10 15:23:39 2006
@@ -147,12 +147,17 @@
     //memset(percpu_info, 0, sizeof(percpu_info));
 }
 
-#if 0
-void free_page_type(struct page_info *page, unsigned int type)
-{
-       dummy();
-}
-#endif
+void free_page_type(struct page_info *page, u32 type)
+{
+//     dummy();
+       return;
+}
+
+int alloc_page_type(struct page_info *page, u32 type)
+{
+//     dummy();
+       return 1;
+}
 
 ///////////////////////////////
 //// misc memory stuff
@@ -415,3 +420,203 @@
        }
        else printk("sync_split_caches ignored for CPU with no split cache\n");
 }
+
+///////////////////////////////
+// from arch/x86/mm.c
+///////////////////////////////
+
+#ifdef VERBOSE
+#define MEM_LOG(_f, _a...)                           \
+  printk("DOM%u: (file=mm.c, line=%d) " _f "\n", \
+         current->domain->domain_id , __LINE__ , ## _a )
+#else
+#define MEM_LOG(_f, _a...) ((void)0)
+#endif
+
+void cleanup_writable_pagetable(struct domain *d)
+{
+  return;
+}
+
+void put_page_type(struct page_info *page)
+{
+    u32 nx, x, y = page->u.inuse.type_info;
+
+ again:
+    do {
+        x  = y;
+        nx = x - 1;
+
+        ASSERT((x & PGT_count_mask) != 0);
+
+        /*
+         * The page should always be validated while a reference is held. The 
+         * exception is during domain destruction, when we forcibly invalidate 
+         * page-table pages if we detect a referential loop.
+         * See domain.c:relinquish_list().
+         */
+        ASSERT((x & PGT_validated) || 
+               test_bit(_DOMF_dying, &page_get_owner(page)->domain_flags));
+
+        if ( unlikely((nx & PGT_count_mask) == 0) )
+        {
+            /* Record TLB information for flush later. Races are harmless. */
+            page->tlbflush_timestamp = tlbflush_current_time();
+            
+            if ( unlikely((nx & PGT_type_mask) <= PGT_l4_page_table) &&
+                 likely(nx & PGT_validated) )
+            {
+                /*
+                 * Page-table pages must be unvalidated when count is zero. The
+                 * 'free' is safe because the refcnt is non-zero and validated
+                 * bit is clear => other ops will spin or fail.
+                 */
+                if ( unlikely((y = cmpxchg(&page->u.inuse.type_info, x, 
+                                           x & ~PGT_validated)) != x) )
+                    goto again;
+                /* We cleared the 'valid bit' so we do the clean up. */
+                free_page_type(page, x);
+                /* Carry on, but with the 'valid bit' now clear. */
+                x  &= ~PGT_validated;
+                nx &= ~PGT_validated;
+            }
+        }
+        else if ( unlikely(((nx & (PGT_pinned | PGT_count_mask)) == 
+                            (PGT_pinned | 1)) &&
+                           ((nx & PGT_type_mask) != PGT_writable_page)) )
+        {
+            /* Page is now only pinned. Make the back pointer mutable again. */
+            nx |= PGT_va_mutable;
+        }
+    }
+    while ( unlikely((y = cmpxchg(&page->u.inuse.type_info, x, nx)) != x) );
+}
+
+
+int get_page_type(struct page_info *page, u32 type)
+{
+    u32 nx, x, y = page->u.inuse.type_info;
+
+ again:
+    do {
+        x  = y;
+        nx = x + 1;
+        if ( unlikely((nx & PGT_count_mask) == 0) )
+        {
+            MEM_LOG("Type count overflow on pfn %lx", page_to_mfn(page));
+            return 0;
+        }
+        else if ( unlikely((x & PGT_count_mask) == 0) )
+        {
+            if ( (x & (PGT_type_mask|PGT_va_mask)) != type )
+            {
+                if ( (x & PGT_type_mask) != (type & PGT_type_mask) )
+                {
+                    /*
+                     * On type change we check to flush stale TLB
+                     * entries. This may be unnecessary (e.g., page
+                     * was GDT/LDT) but those circumstances should be
+                     * very rare.
+                     */
+                    cpumask_t mask =
+                        page_get_owner(page)->domain_dirty_cpumask;
+                    tlbflush_filter(mask, page->tlbflush_timestamp);
+
+                    if ( unlikely(!cpus_empty(mask)) )
+                    {
+                        perfc_incrc(need_flush_tlb_flush);
+                        flush_tlb_mask(mask);
+                    }
+                }
+
+                /* We lose existing type, back pointer, and validity. */
+                nx &= ~(PGT_type_mask | PGT_va_mask | PGT_validated);
+                nx |= type;
+
+                /* No special validation needed for writable pages. */
+                /* Page tables and GDT/LDT need to be scanned for validity. */
+                if ( type == PGT_writable_page )
+                    nx |= PGT_validated;
+            }
+        }
+        else
+        {
+            if ( unlikely((x & (PGT_type_mask|PGT_va_mask)) != type) )
+            {
+                if ( unlikely((x & PGT_type_mask) != (type & PGT_type_mask) ) )
+                {
+                    if ( current->domain == page_get_owner(page) )
+                    {
+                        /*
+                         * This ensures functions like set_gdt() see up-to-date
+                         * type info without needing to clean up writable p.t.
+                         * state on the fast path.
+                         */
+                        LOCK_BIGLOCK(current->domain);
+                        cleanup_writable_pagetable(current->domain);
+                        y = page->u.inuse.type_info;
+                        UNLOCK_BIGLOCK(current->domain);
+                        /* Can we make progress now? */
+                        if ( ((y & PGT_type_mask) == (type & PGT_type_mask)) ||
+                             ((y & PGT_count_mask) == 0) )
+                            goto again;
+                    }
+                    if ( ((x & PGT_type_mask) != PGT_l2_page_table) ||
+                         ((type & PGT_type_mask) != PGT_l1_page_table) )
+                        MEM_LOG("Bad type (saw %" PRtype_info
+                                " != exp %" PRtype_info ") "
+                                "for mfn %lx (pfn %lx)",
+                                x, type, page_to_mfn(page),
+                                get_gpfn_from_mfn(page_to_mfn(page)));
+                    return 0;
+                }
+                else if ( (x & PGT_va_mask) == PGT_va_mutable )
+                {
+                    /* The va backpointer is mutable, hence we update it. */
+                    nx &= ~PGT_va_mask;
+                    nx |= type; /* we know the actual type is correct */
+                }
+                else if ( ((type & PGT_va_mask) != PGT_va_mutable) &&
+                          ((type & PGT_va_mask) != (x & PGT_va_mask)) )
+                {
+#ifdef CONFIG_X86_PAE
+                    /* We use backptr as extra typing. Cannot be unknown. */
+                    if ( (type & PGT_type_mask) == PGT_l2_page_table )
+                        return 0;
+#endif
+                    /* This table is possibly mapped at multiple locations. */
+                    nx &= ~PGT_va_mask;
+                    nx |= PGT_va_unknown;
+                }
+            }
+            if ( unlikely(!(x & PGT_validated)) )
+            {
+                /* Someone else is updating validation of this page. Wait... */
+                while ( (y = page->u.inuse.type_info) == x )
+                    cpu_relax();
+                goto again;
+            }
+        }
+    }
+    while ( unlikely((y = cmpxchg(&page->u.inuse.type_info, x, nx)) != x) );
+
+    if ( unlikely(!(nx & PGT_validated)) )
+    {
+        /* Try to validate page type; drop the new reference on failure. */
+        if ( unlikely(!alloc_page_type(page, type)) )
+        {
+            MEM_LOG("Error while validating mfn %lx (pfn %lx) for type %"
+                    PRtype_info ": caf=%08x taf=%" PRtype_info,
+                    page_to_mfn(page), get_gpfn_from_mfn(page_to_mfn(page)),
+                    type, page->count_info, page->u.inuse.type_info);
+            /* Noone else can get a reference. We hold the only ref. */
+            page->u.inuse.type_info = 0;
+            return 0;
+        }
+
+        /* Noone else is updating simultaneously. */
+        __set_bit(_PGT_validated, &page->u.inuse.type_info);
+    }
+
+    return 1;
+}
diff -r 405f0f847c0f -r c644eb4049ab xen/include/asm-ia64/mm.h
--- a/xen/include/asm-ia64/mm.h Thu Mar  9 23:24:31 2006
+++ b/xen/include/asm-ia64/mm.h Fri Mar 10 15:23:39 2006
@@ -41,32 +41,33 @@
     /* Each frame can be threaded onto a doubly-linked list. */
     struct list_head list;
 
+    /* Reference count and various PGC_xxx flags and fields. */
+    u32 count_info;
+
+    /* Context-dependent fields follow... */
+    union {
+
+        /* Page is in use: ((count_info & PGC_count_mask) != 0). */
+        struct {
+            /* Owner of this page (NULL if page is anonymous). */
+            u32 _domain; /* pickled format */
+            /* Type reference count and various PGT_xxx flags and fields. */
+            unsigned long type_info;
+        } __attribute__ ((packed)) inuse;
+
+        /* Page is on a free list: ((count_info & PGC_count_mask) == 0). */
+        struct {
+            /* Order-size of the free chunk this page is the head of. */
+            u32 order;
+            /* Mask of possibly-tainted TLBs. */
+            cpumask_t cpumask;
+        } __attribute__ ((packed)) free;
+
+    } u;
+
     /* Timestamp from 'TLB clock', used to reduce need for safety flushes. */
     u32 tlbflush_timestamp;
 
-    /* Reference count and various PGC_xxx flags and fields. */
-    u32 count_info;
-
-    /* Context-dependent fields follow... */
-    union {
-
-        /* Page is in use by a domain. */
-        struct {
-            /* Owner of this page. */
-            u32        _domain;
-            /* Type reference count and various PGT_xxx flags and fields. */
-            u32 type_info;
-        } inuse;
-
-        /* Page is on a free list. */
-        struct {
-            /* Mask of possibly-tainted TLBs. */
-            cpumask_t cpumask;
-            /* Order-size of the free chunk this page is the head of. */
-            u8 order;
-        } free;
-
-    } u;
 #if 0
 // following added for Linux compiling
     page_flags_t flags;
@@ -94,8 +95,15 @@
 #define _PGT_pinned         27
 #define PGT_pinned          (1U<<_PGT_pinned)
 
-/* 27-bit count of uses of this frame as its current type. */
-#define PGT_count_mask      ((1U<<27)-1)
+/* The 11 most significant bits of virt address if this is a page table. */
+#define PGT_va_shift        16
+#define PGT_va_mask         (((1U<<11)-1)<<PGT_va_shift)
+/* Is the back pointer still mutable (i.e. not fixed yet)? */
+#define PGT_va_mutable      (((1U<<11)-1)<<PGT_va_shift)
+/* Is the back pointer unknown (e.g., p.t. is mapped at multiple VAs)? */
+#define PGT_va_unknown      (((1U<<11)-2)<<PGT_va_shift)
+/* 16-bit count of uses of this frame as its current type. */
+#define PGT_count_mask      ((1U<<16)-1)
 
 /* Cleared when the owning guest 'frees' this page. */
 #define _PGC_allocated      31
@@ -138,7 +146,6 @@
 
 static inline void put_page(struct page_info *page)
 {
-#ifdef VALIDATE_VT     // doesn't work with non-VTI in grant tables yet
     u32 nx, x, y = page->count_info;
 
     do {
@@ -149,14 +156,12 @@
 
     if (unlikely((nx & PGC_count_mask) == 0))
        free_domheap_page(page);
-#endif
 }
 
 /* count_info and ownership are checked atomically. */
 static inline int get_page(struct page_info *page,
                            struct domain *domain)
 {
-#ifdef VALIDATE_VT
     u64 x, nx, y = *((u64*)&page->count_info);
     u32 _domain = pickle_domptr(domain);
 
@@ -172,14 +177,13 @@
            return 0;
        }
     }
-    while(unlikely(y = cmpxchg(&page->count_info, x, nx)) != x);
-#endif
+    while(unlikely((y = cmpxchg((u64*)&page->count_info, x, nx)) != x));
     return 1;
 }
 
-/* No type info now */
-#define put_page_type(page)
-#define get_page_type(page, type) 1
+extern void put_page_type(struct page_info *page);
+extern int get_page_type(struct page_info *page, u32 type);
+
 static inline void put_page_and_type(struct page_info *page)
 {
     put_page_type(page);

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.