[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-changelog] [linux-2.6.18-xen] linux: allow use of split page table locks
# HG changeset patch # User Keir Fraser <keir@xxxxxxxxxxxxx> # Date 1191577746 -3600 # Node ID 6e26ffc60647bd7454d0a066a8ab63ef7f0123af # Parent ac1f33f633ba158a5427f24dbc31a1ee573a02b7 linux: allow use of split page table locks This fixes the race condition previously experienced between (un)pinning and vmscan. Signed-off-by: Jan Beulich <jbeulich@xxxxxxxxxx> --- arch/i386/mm/pgtable-xen.c | 66 +++++++++++++++++++++++++++++++++++++++--- arch/x86_64/mm/pageattr-xen.c | 66 +++++++++++++++++++++++++++++++++++++++--- mm/Kconfig | 3 - 3 files changed, 124 insertions(+), 11 deletions(-) diff -r ac1f33f633ba -r 6e26ffc60647 arch/i386/mm/pgtable-xen.c --- a/arch/i386/mm/pgtable-xen.c Wed Oct 03 15:02:54 2007 +0100 +++ b/arch/i386/mm/pgtable-xen.c Fri Oct 05 10:49:06 2007 +0100 @@ -494,6 +494,64 @@ void make_pages_writable(void *va, unsig } } +static void _pin_lock(struct mm_struct *mm, int lock) { + if (lock) + spin_lock(&mm->page_table_lock); +#if NR_CPUS >= CONFIG_SPLIT_PTLOCK_CPUS + /* While mm->page_table_lock protects us against insertions and + * removals of higher level page table pages, it doesn't protect + * against updates of pte-s. Such updates, however, require the + * pte pages to be in consistent state (unpinned+writable or + * pinned+readonly). The pinning and attribute changes, however + * cannot be done atomically, which is why such updates must be + * prevented from happening concurrently. + * Note that no pte lock can ever elsewhere be acquired nesting + * with an already acquired one in the same mm, or with the mm's + * page_table_lock already acquired, as that would break in the + * non-split case (where all these are actually resolving to the + * one page_table_lock). Thus acquiring all of them here is not + * going to result in dead locks, and the order of acquires + * doesn't matter. + */ + { + pgd_t *pgd = mm->pgd; + unsigned g; + + for (g = 0; g < USER_PTRS_PER_PGD; g++, pgd++) { + pud_t *pud; + unsigned u; + + if (pgd_none(*pgd)) + continue; + pud = pud_offset(pgd, 0); + for (u = 0; u < PTRS_PER_PUD; u++, pud++) { + pmd_t *pmd; + unsigned m; + + if (pud_none(*pud)) + continue; + pmd = pmd_offset(pud, 0); + for (m = 0; m < PTRS_PER_PMD; m++, pmd++) { + spinlock_t *ptl; + + if (pmd_none(*pmd)) + continue; + ptl = pte_lockptr(0, pmd); + if (lock) + spin_lock(ptl); + else + spin_unlock(ptl); + } + } + } + } +#endif + if (!lock) + spin_unlock(&mm->page_table_lock); +} +#define pin_lock(mm) _pin_lock(mm, 1) +#define pin_unlock(mm) _pin_lock(mm, 0) + static inline void pgd_walk_set_prot(struct page *page, pgprot_t flags) { unsigned long pfn = page_to_pfn(page); @@ -576,18 +634,18 @@ void mm_pin(struct mm_struct *mm) { if (xen_feature(XENFEAT_writable_page_tables)) return; - spin_lock(&mm->page_table_lock); + pin_lock(mm); __pgd_pin(mm->pgd); - spin_unlock(&mm->page_table_lock); + pin_unlock(mm); } void mm_unpin(struct mm_struct *mm) { if (xen_feature(XENFEAT_writable_page_tables)) return; - spin_lock(&mm->page_table_lock); + pin_lock(mm); __pgd_unpin(mm->pgd); - spin_unlock(&mm->page_table_lock); + pin_unlock(mm); } void mm_pin_all(void) diff -r ac1f33f633ba -r 6e26ffc60647 arch/x86_64/mm/pageattr-xen.c --- a/arch/x86_64/mm/pageattr-xen.c Wed Oct 03 15:02:54 2007 +0100 +++ b/arch/x86_64/mm/pageattr-xen.c Fri Oct 05 10:49:06 2007 +0100 @@ -19,6 +19,64 @@ LIST_HEAD(mm_unpinned); DEFINE_SPINLOCK(mm_unpinned_lock); + +static void _pin_lock(struct mm_struct *mm, int lock) { + if (lock) + spin_lock(&mm->page_table_lock); +#if NR_CPUS >= CONFIG_SPLIT_PTLOCK_CPUS + /* While mm->page_table_lock protects us against insertions and + * removals of higher level page table pages, it doesn't protect + * against updates of pte-s. Such updates, however, require the + * pte pages to be in consistent state (unpinned+writable or + * pinned+readonly). The pinning and attribute changes, however + * cannot be done atomically, which is why such updates must be + * prevented from happening concurrently. + * Note that no pte lock can ever elsewhere be acquired nesting + * with an already acquired one in the same mm, or with the mm's + * page_table_lock already acquired, as that would break in the + * non-split case (where all these are actually resolving to the + * one page_table_lock). Thus acquiring all of them here is not + * going to result in dead locks, and the order of acquires + * doesn't matter. + */ + { + pgd_t *pgd = mm->pgd; + unsigned g; + + for (g = 0; g <= ((TASK_SIZE64-1) / PGDIR_SIZE); g++, pgd++) { + pud_t *pud; + unsigned u; + + if (pgd_none(*pgd)) + continue; + pud = pud_offset(pgd, 0); + for (u = 0; u < PTRS_PER_PUD; u++, pud++) { + pmd_t *pmd; + unsigned m; + + if (pud_none(*pud)) + continue; + pmd = pmd_offset(pud, 0); + for (m = 0; m < PTRS_PER_PMD; m++, pmd++) { + spinlock_t *ptl; + + if (pmd_none(*pmd)) + continue; + ptl = pte_lockptr(0, pmd); + if (lock) + spin_lock(ptl); + else + spin_unlock(ptl); + } + } + } + } +#endif + if (!lock) + spin_unlock(&mm->page_table_lock); +} +#define pin_lock(mm) _pin_lock(mm, 1) +#define pin_unlock(mm) _pin_lock(mm, 0) static inline void mm_walk_set_prot(void *pt, pgprot_t flags) { @@ -76,7 +134,7 @@ void mm_pin(struct mm_struct *mm) if (xen_feature(XENFEAT_writable_page_tables)) return; - spin_lock(&mm->page_table_lock); + pin_lock(mm); mm_walk(mm, PAGE_KERNEL_RO); if (HYPERVISOR_update_va_mapping( @@ -97,7 +155,7 @@ void mm_pin(struct mm_struct *mm) list_del(&mm->context.unpinned); spin_unlock(&mm_unpinned_lock); - spin_unlock(&mm->page_table_lock); + pin_unlock(mm); } void mm_unpin(struct mm_struct *mm) @@ -105,7 +163,7 @@ void mm_unpin(struct mm_struct *mm) if (xen_feature(XENFEAT_writable_page_tables)) return; - spin_lock(&mm->page_table_lock); + pin_lock(mm); xen_pgd_unpin(__pa(mm->pgd)); xen_pgd_unpin(__pa(__user_pgd(mm->pgd))); @@ -125,7 +183,7 @@ void mm_unpin(struct mm_struct *mm) list_add(&mm->context.unpinned, &mm_unpinned); spin_unlock(&mm_unpinned_lock); - spin_unlock(&mm->page_table_lock); + pin_unlock(mm); } void mm_pin_all(void) diff -r ac1f33f633ba -r 6e26ffc60647 mm/Kconfig --- a/mm/Kconfig Wed Oct 03 15:02:54 2007 +0100 +++ b/mm/Kconfig Fri Oct 05 10:49:06 2007 +0100 @@ -127,14 +127,11 @@ comment "Memory hotplug is currently inc # Default to 4 for wider testing, though 8 might be more appropriate. # ARM's adjust_pte (unused if VIPT) depends on mm-wide page_table_lock. # PA-RISC 7xxx's spinlock_t would enlarge struct page from 32 to 44 bytes. -# XEN on x86 architecture uses the mapping field on pagetable pages to store a -# pointer to the destructor. This conflicts with pte_lock_deinit(). # config SPLIT_PTLOCK_CPUS int default "4096" if ARM && !CPU_CACHE_VIPT default "4096" if PARISC && !PA20 - default "4096" if X86_XEN || X86_64_XEN default "4" # _______________________________________________ Xen-changelog mailing list Xen-changelog@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-changelog
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |