[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-changelog] [xen-unstable] [XEN] Avoid taking domain biglock in the page-fault handler.
# HG changeset patch # User kaf24@xxxxxxxxxxxxxxxxxxxxx # Node ID 5ec45b46456350f23e0aa4148bd52cbb64f1addf # Parent 13ea4bea823764599c05a1dea2b7441e0f63456d [XEN] Avoid taking domain biglock in the page-fault handler. This avoids deadlock situation with the shadow_lock. Signed-off-by: Keir Fraser <keir@xxxxxxxxxxxxx> --- xen/arch/x86/traps.c | 43 ++++++++++++------------------- xen/include/asm-x86/mm.h | 6 ++-- xen/include/asm-x86/page.h | 12 ++++++++ xen/include/asm-x86/x86_32/page-2level.h | 3 ++ xen/include/asm-x86/x86_32/page-3level.h | 11 +++++++ xen/include/asm-x86/x86_64/page.h | 3 ++ 6 files changed, 50 insertions(+), 28 deletions(-) diff -r 13ea4bea8237 -r 5ec45b464563 xen/arch/x86/traps.c --- a/xen/arch/x86/traps.c Fri Nov 03 16:51:28 2006 +0000 +++ b/xen/arch/x86/traps.c Sat Nov 04 19:26:29 2006 +0000 @@ -704,12 +704,6 @@ static int handle_gdt_ldt_mapping_fault( static int handle_gdt_ldt_mapping_fault( unsigned long offset, struct cpu_user_regs *regs) { - extern int map_ldt_shadow_page(unsigned int); - - struct vcpu *v = current; - struct domain *d = v->domain; - int ret; - /* Which vcpu's area did we fault in, and is it in the ldt sub-area? */ unsigned int is_ldt_area = (offset >> (GDT_LDT_VCPU_VA_SHIFT-1)) & 1; unsigned int vcpu_area = (offset >> GDT_LDT_VCPU_VA_SHIFT); @@ -723,18 +717,15 @@ static int handle_gdt_ldt_mapping_fault( if ( likely(is_ldt_area) ) { /* LDT fault: Copy a mapping from the guest's LDT, if it is valid. */ - LOCK_BIGLOCK(d); - ret = map_ldt_shadow_page(offset >> PAGE_SHIFT); - UNLOCK_BIGLOCK(d); - - if ( unlikely(ret == 0) ) + if ( unlikely(map_ldt_shadow_page(offset >> PAGE_SHIFT) == 0) ) { /* In hypervisor mode? Leave it to the #PF handler to fix up. */ if ( !guest_mode(regs) ) return 0; /* In guest mode? Propagate #PF to guest, with adjusted %cr2. */ propagate_page_fault( - v->arch.guest_context.ldt_base + offset, regs->error_code); + current->arch.guest_context.ldt_base + offset, + regs->error_code); } } else @@ -787,7 +778,7 @@ static int __spurious_page_fault( #if CONFIG_PAGING_LEVELS >= 4 l4t = map_domain_page(mfn); - l4e = l4t[l4_table_offset(addr)]; + l4e = l4e_read_atomic(&l4t[l4_table_offset(addr)]); mfn = l4e_get_pfn(l4e); unmap_domain_page(l4t); if ( ((l4e_get_flags(l4e) & required_flags) != required_flags) || @@ -800,7 +791,7 @@ static int __spurious_page_fault( #ifdef CONFIG_X86_PAE l3t += (cr3 & 0xFE0UL) >> 3; #endif - l3e = l3t[l3_table_offset(addr)]; + l3e = l3e_read_atomic(&l3t[l3_table_offset(addr)]); mfn = l3e_get_pfn(l3e); unmap_domain_page(l3t); #ifdef CONFIG_X86_PAE @@ -814,7 +805,7 @@ static int __spurious_page_fault( #endif l2t = map_domain_page(mfn); - l2e = l2t[l2_table_offset(addr)]; + l2e = l2e_read_atomic(&l2t[l2_table_offset(addr)]); mfn = l2e_get_pfn(l2e); unmap_domain_page(l2t); if ( ((l2e_get_flags(l2e) & required_flags) != required_flags) || @@ -827,7 +818,7 @@ static int __spurious_page_fault( } l1t = map_domain_page(mfn); - l1e = l1t[l1_table_offset(addr)]; + l1e = l1e_read_atomic(&l1t[l1_table_offset(addr)]); mfn = l1e_get_pfn(l1e); unmap_domain_page(l1t); if ( ((l1e_get_flags(l1e) & required_flags) != required_flags) || @@ -856,12 +847,16 @@ static int spurious_page_fault( static int spurious_page_fault( unsigned long addr, struct cpu_user_regs *regs) { - struct domain *d = current->domain; - int is_spurious; - - LOCK_BIGLOCK(d); + unsigned long flags; + int is_spurious; + + /* + * Disabling interrupts prevents TLB flushing, and hence prevents + * page tables from becoming invalid under our feet during the walk. + */ + local_irq_save(flags); is_spurious = __spurious_page_fault(addr, regs); - UNLOCK_BIGLOCK(d); + local_irq_restore(flags); return is_spurious; } @@ -878,11 +873,7 @@ static int fixup_page_fault(unsigned lon if ( (addr >= GDT_LDT_VIRT_START) && (addr < GDT_LDT_VIRT_END) ) return handle_gdt_ldt_mapping_fault( addr - GDT_LDT_VIRT_START, regs); - /* - * Do not propagate spurious faults in the hypervisor area to the - * guest. It cannot fix them up. - */ - return (spurious_page_fault(addr, regs) ? EXCRET_not_a_fault : 0); + return 0; } if ( VM_ASSIST(d, VMASST_TYPE_writable_pagetables) && diff -r 13ea4bea8237 -r 5ec45b464563 xen/include/asm-x86/mm.h --- a/xen/include/asm-x86/mm.h Fri Nov 03 16:51:28 2006 +0000 +++ b/xen/include/asm-x86/mm.h Sat Nov 04 19:26:29 2006 +0000 @@ -179,8 +179,8 @@ void init_frametable(void); int alloc_page_type(struct page_info *page, unsigned long type); void free_page_type(struct page_info *page, unsigned long type); -extern void invalidate_shadow_ldt(struct vcpu *d); -extern int _shadow_mode_refcounts(struct domain *d); +void invalidate_shadow_ldt(struct vcpu *d); +int _shadow_mode_refcounts(struct domain *d); static inline void put_page(struct page_info *page) { @@ -385,4 +385,6 @@ int steal_page( int steal_page( struct domain *d, struct page_info *page, unsigned int memflags); +int map_ldt_shadow_page(unsigned int); + #endif /* __ASM_X86_MM_H__ */ diff -r 13ea4bea8237 -r 5ec45b464563 xen/include/asm-x86/page.h --- a/xen/include/asm-x86/page.h Fri Nov 03 16:51:28 2006 +0000 +++ b/xen/include/asm-x86/page.h Sat Nov 04 19:26:29 2006 +0000 @@ -24,6 +24,18 @@ #elif defined(__x86_64__) # include <asm/x86_64/page.h> #endif + +/* Read a pte atomically from memory. */ +#define l1e_read_atomic(l1ep) l1e_from_intpte(pte_read_atomic(l1ep)) +#define l2e_read_atomic(l2ep) l2e_from_intpte(pte_read_atomic(l2ep)) +#define l3e_read_atomic(l3ep) l3e_from_intpte(pte_read_atomic(l3ep)) +#define l4e_read_atomic(l4ep) l4e_from_intpte(pte_read_atomic(l4ep)) + +/* Write a pte atomically to memory. */ +#define l1e_write_atomic(l1ep, l1e) pte_write_atomic(l1ep, l1e_get_intpte(l1e)) +#define l2e_write_atomic(l2ep, l2e) pte_write_atomic(l2ep, l1e_get_intpte(l2e)) +#define l3e_write_atomic(l3ep, l3e) pte_write_atomic(l3ep, l1e_get_intpte(l3e)) +#define l4e_write_atomic(l4ep, l4e) pte_write_atomic(l4ep, l1e_get_intpte(l4e)) /* Get direct integer representation of a pte's contents (intpte_t). */ #define l1e_get_intpte(x) ((x).l1) diff -r 13ea4bea8237 -r 5ec45b464563 xen/include/asm-x86/x86_32/page-2level.h --- a/xen/include/asm-x86/x86_32/page-2level.h Fri Nov 03 16:51:28 2006 +0000 +++ b/xen/include/asm-x86/x86_32/page-2level.h Sat Nov 04 19:26:29 2006 +0000 @@ -28,6 +28,9 @@ typedef l2_pgentry_t root_pgentry_t; #endif /* !__ASSEMBLY__ */ +#define pte_read_atomic(ptep) (*(intpte_t *)(ptep)) +#define pte_write_atomic(ptep, pte) (*(intpte_t *)(ptep)) = (pte)) + /* root table */ #define root_get_pfn l2e_get_pfn #define root_get_flags l2e_get_flags diff -r 13ea4bea8237 -r 5ec45b464563 xen/include/asm-x86/x86_32/page-3level.h --- a/xen/include/asm-x86/x86_32/page-3level.h Fri Nov 03 16:51:28 2006 +0000 +++ b/xen/include/asm-x86/x86_32/page-3level.h Sat Nov 04 19:26:29 2006 +0000 @@ -38,6 +38,17 @@ typedef l3_pgentry_t root_pgentry_t; #endif /* !__ASSEMBLY__ */ +#define pte_read_atomic(ptep) ({ \ + intpte_t __pte = *(intpte_t *)(ptep), __npte; \ + while ( (__npte = cmpxchg((intpte_t *)(ptep), __pte, __pte)) != __pte ) \ + __pte = __npte; \ + __pte; }) +#define pte_write_atomic(ptep, pte) do { \ + intpte_t __pte = *(intpte_t *)(ptep), __npte; \ + while ( (__npte = cmpxchg((intpte_t *)(ptep), __pte, (pte))) != __pte ) \ + __pte = __npte; \ +} while ( 0 ) + /* root table */ #define root_get_pfn l3e_get_pfn #define root_get_flags l3e_get_flags diff -r 13ea4bea8237 -r 5ec45b464563 xen/include/asm-x86/x86_64/page.h --- a/xen/include/asm-x86/x86_64/page.h Fri Nov 03 16:51:28 2006 +0000 +++ b/xen/include/asm-x86/x86_64/page.h Sat Nov 04 19:26:29 2006 +0000 @@ -40,6 +40,9 @@ typedef l4_pgentry_t root_pgentry_t; typedef l4_pgentry_t root_pgentry_t; #endif /* !__ASSEMBLY__ */ + +#define pte_read_atomic(ptep) (*(intpte_t *)(ptep)) +#define pte_write_atomic(ptep, pte) (*(intpte_t *)(ptep)) = (pte)) /* Given a virtual address, get an entry offset into a linear page table. */ #define l1_linear_offset(_a) (((_a) & VADDR_MASK) >> L1_PAGETABLE_SHIFT) _______________________________________________ Xen-changelog mailing list Xen-changelog@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-changelog
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |