[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-changelog] [xen-3.2-testing] Handle DRHDs with different supported AGAWs.
# HG changeset patch # User Keir Fraser <keir.fraser@xxxxxxxxxx> # Date 1217344395 -3600 # Node ID f830b47149a4ccd7c0b0be0eaa41add653c90c36 # Parent 6de4320d71f9da9228a130e2b02bd855c1c53cf3 Handle DRHDs with different supported AGAWs. This changeset is back-ported from xen-unstable. Signed-off-by: Dexuan Cui <dexuan.cui@xxxxxxxxx> The original description of the changeset is: vtd: Various cleanups and fixes: * Handle DRHDs with different supported AGAWs. To support this we create page tables which always have 4 levels, and skip top levels for units which support only 2 or 3 levels. * Handle systems with mixed DRHD support for cache snooping. We must pessimistically CLFLUSH if any DRHD does not support snooping. Signed-off-by: Keir Fraser <keir.fraser@xxxxxxxxxx> xen-unstable changeset: 17755:ecd266cebcab648132d432899eabaecf8a168508 xen-unstable date: Fri May 30 15:06:08 2008 +0100 --- xen/arch/x86/hvm/vmx/vtd/intel-iommu.c | 175 +++++++++++++++--------------- xen/include/asm-x86/hvm/vmx/intel-iommu.h | 1 xen/include/asm-x86/iommu.h | 1 3 files changed, 95 insertions(+), 82 deletions(-) diff -r 6de4320d71f9 -r f830b47149a4 xen/arch/x86/hvm/vmx/vtd/intel-iommu.c --- a/xen/arch/x86/hvm/vmx/vtd/intel-iommu.c Fri Jul 25 15:04:26 2008 +0100 +++ b/xen/arch/x86/hvm/vmx/vtd/intel-iommu.c Tue Jul 29 16:13:15 2008 +0100 @@ -79,24 +79,28 @@ static void iommu_domid_release(struct d } } -unsigned int x86_clflush_size; -void clflush_cache_range(void *adr, int size) +static unsigned int x86_clflush_size; +static int iommus_incoherent; +static void __iommu_flush_cache(void *addr, int size) { int i; + + if ( !iommus_incoherent ) + return; + for ( i = 0; i < size; i += x86_clflush_size ) - clflush(adr + i); -} - -static void __iommu_flush_cache(struct iommu *iommu, void *addr, int size) -{ - if ( !ecap_coherent(iommu->ecap) ) - clflush_cache_range(addr, size); -} - -#define iommu_flush_cache_entry(iommu, addr) \ - __iommu_flush_cache(iommu, addr, 8) -#define iommu_flush_cache_page(iommu, addr) \ - __iommu_flush_cache(iommu, addr, PAGE_SIZE_4K) + clflush((char*)addr + i); +} + +void iommu_flush_cache_entry(void *addr) +{ + __iommu_flush_cache(addr, 8); +} + +void iommu_flush_cache_page(void *addr) +{ + __iommu_flush_cache(addr, PAGE_SIZE_4K); +} int nr_iommus; /* context entry handling */ @@ -119,11 +123,11 @@ static struct context_entry * device_to_ return NULL; } memset((void *) phy_addr, 0, PAGE_SIZE); - iommu_flush_cache_page(iommu, (void *)phy_addr); + iommu_flush_cache_page((void *)phy_addr); phy_addr = virt_to_maddr((void *)phy_addr); set_root_value(*root, phy_addr); set_root_present(*root); - iommu_flush_cache_entry(iommu, root); + iommu_flush_cache_entry(root); } phy_addr = (unsigned long) get_context_addr(*root); context = (struct context_entry *)maddr_to_virt(phy_addr); @@ -157,8 +161,6 @@ static struct page_info *addr_to_dma_pag static struct page_info *addr_to_dma_page(struct domain *domain, u64 addr) { struct hvm_iommu *hd = domain_hvm_iommu(domain); - struct acpi_drhd_unit *drhd; - struct iommu *iommu; int addr_width = agaw_to_width(hd->agaw); struct dma_pte *parent, *pte = NULL, *pgd; int level = agaw_to_level(hd->agaw); @@ -166,9 +168,6 @@ static struct page_info *addr_to_dma_pag unsigned long flags; struct page_info *pg = NULL; u64 *vaddr = NULL; - - drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list); - iommu = drhd->iommu; addr &= (((u64)1) << addr_width) - 1; spin_lock_irqsave(&hd->mapping_lock, flags); @@ -200,7 +199,7 @@ static struct page_info *addr_to_dma_pag return NULL; } memset(vaddr, 0, PAGE_SIZE); - iommu_flush_cache_page(iommu, vaddr); + iommu_flush_cache_page(vaddr); dma_set_pte_addr(*pte, page_to_maddr(pg)); @@ -210,7 +209,7 @@ static struct page_info *addr_to_dma_pag */ dma_set_pte_readable(*pte); dma_set_pte_writable(*pte); - iommu_flush_cache_entry(iommu, pte); + iommu_flush_cache_entry(pte); } else { @@ -549,8 +548,6 @@ static void dma_pte_clear_one(struct dom struct dma_pte *pte = NULL; struct page_info *pg = NULL; - drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list); - /* get last level pte */ pg = dma_addr_level_page(domain, addr, 1); if ( !pg ) @@ -560,7 +557,7 @@ static void dma_pte_clear_one(struct dom if ( pte ) { dma_clear_pte(*pte); - iommu_flush_cache_entry(drhd->iommu, pte); + iommu_flush_cache_entry(pte); for_each_drhd_unit ( drhd ) { @@ -602,18 +599,13 @@ static void dma_pte_clear_range(struct d /* free page table pages. last level pte should already be cleared */ void dma_pte_free_pagetable(struct domain *domain, u64 start, u64 end) { - struct acpi_drhd_unit *drhd; struct hvm_iommu *hd = domain_hvm_iommu(domain); - struct iommu *iommu; int addr_width = agaw_to_width(hd->agaw); struct dma_pte *pte; int total = agaw_to_level(hd->agaw); int level; u64 tmp; struct page_info *pg = NULL; - - drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list); - iommu = drhd->iommu; start &= (((u64)1) << addr_width) - 1; end &= (((u64)1) << addr_width) - 1; @@ -637,7 +629,7 @@ void dma_pte_free_pagetable(struct domai pte = (struct dma_pte *)map_domain_page(page_to_mfn(pg)); pte += address_level_offset(tmp, level); dma_clear_pte(*pte); - iommu_flush_cache_entry(iommu, pte); + iommu_flush_cache_entry(pte); unmap_domain_page(pte); free_domheap_page(pg); @@ -677,7 +669,7 @@ static int iommu_set_root_entry(struct i return -ENOMEM; memset((u8*)root, 0, PAGE_SIZE); - iommu_flush_cache_page(iommu, root); + iommu_flush_cache_page(root); if ( cmpxchg((unsigned long *)&iommu->root_entry, 0, (unsigned long)root) != 0 ) @@ -963,6 +955,8 @@ struct iommu *iommu_alloc(void *hw_data) { struct acpi_drhd_unit *drhd = (struct acpi_drhd_unit *) hw_data; struct iommu *iommu; + unsigned long sagaw; + int agaw; if ( nr_iommus > MAX_IOMMUS ) { @@ -991,6 +985,23 @@ struct iommu *iommu_alloc(void *hw_data) iommu->cap = dmar_readq(iommu->reg, DMAR_CAP_REG); iommu->ecap = dmar_readq(iommu->reg, DMAR_ECAP_REG); + + /* Calculate number of pagetable levels: between 2 and 4. */ + sagaw = cap_sagaw(iommu->cap); + for ( agaw = level_to_agaw(4); agaw >= 0; agaw-- ) + if ( test_bit(agaw, &sagaw) ) + break; + if ( agaw < 0 ) + { + gdprintk(XENLOG_ERR VTDPREFIX, + "IOMMU: unsupported sagaw %lx\n", sagaw); + xfree(iommu); + return NULL; + } + iommu->nr_pt_levels = agaw_to_level(agaw); + + if ( !ecap_coherent(iommu->ecap) ) + iommus_incoherent = 1; spin_lock_init(&iommu->lock); spin_lock_init(&iommu->register_lock); @@ -1025,9 +1036,6 @@ int iommu_domain_init(struct domain *dom { struct hvm_iommu *hd = domain_hvm_iommu(domain); struct iommu *iommu = NULL; - int guest_width = DEFAULT_DOMAIN_ADDRESS_WIDTH; - int adjust_width, agaw; - unsigned long sagaw; struct acpi_drhd_unit *drhd; spin_lock_init(&hd->mapping_lock); @@ -1041,22 +1049,7 @@ int iommu_domain_init(struct domain *dom for_each_drhd_unit ( drhd ) iommu = drhd->iommu ? : iommu_alloc(drhd); - /* calculate AGAW */ - if (guest_width > cap_mgaw(iommu->cap)) - guest_width = cap_mgaw(iommu->cap); - adjust_width = guestwidth_to_adjustwidth(guest_width); - agaw = width_to_agaw(adjust_width); - /* FIXME: hardware doesn't support it, choose a bigger one? */ - sagaw = cap_sagaw(iommu->cap); - if ( !test_bit(agaw, &sagaw) ) - { - gdprintk(XENLOG_ERR VTDPREFIX, - "IOMMU: hardware doesn't support the agaw\n"); - agaw = find_next_bit(&sagaw, 5, agaw); - if ( agaw >= 5 ) - return -ENODEV; - } - hd->agaw = agaw; + hd->agaw = width_to_agaw(DEFAULT_DOMAIN_ADDRESS_WIDTH); return 0; } @@ -1069,6 +1062,8 @@ static int domain_context_mapping_one( struct context_entry *context; unsigned long flags; int ret = 0; + u64 pgd_maddr; + int agaw = -1; context = device_to_context_entry(iommu, bus, devfn); if ( !context ) @@ -1089,36 +1084,54 @@ static int domain_context_mapping_one( } spin_lock_irqsave(&iommu->lock, flags); + + if ( ecap_pass_thru(iommu->ecap) ) + context_set_translation_type(*context, CONTEXT_TT_PASS_THRU); + else + { + /* Ensure we have pagetables allocated down to leaf PTE. */ + if ( !hd->pgd ) + { + addr_to_dma_page(domain, 0); + if ( !hd->pgd ) + { + nomem: + spin_unlock_irqrestore(&hd->mapping_lock, flags); + return -ENOMEM; + } + } + + /* Skip top levels of page tables for 2- and 3-level DRHDs. */ + pgd_maddr = virt_to_maddr(hd->pgd); + for ( agaw = level_to_agaw(4); + agaw != level_to_agaw(iommu->nr_pt_levels); + agaw-- ) + { + if ( agaw == level_to_agaw(4) ) + pgd_maddr = dma_pte_addr(*hd->pgd); + else + { + struct dma_pte *p = map_domain_page(pgd_maddr); + pgd_maddr = dma_pte_addr(*p); + unmap_domain_page(p); + if ( pgd_maddr == 0 ) + goto nomem; + } + } + context_set_address_root(*context, pgd_maddr); + context_set_translation_type(*context, CONTEXT_TT_MULTI_LEVEL); + } + /* * domain_id 0 is not valid on Intel's IOMMU, force domain_id to * be 1 based as required by intel's iommu hw. */ + BUG_ON(agaw == -1); context_set_domain_id(context, domain); - context_set_address_width(*context, hd->agaw); - - if ( ecap_pass_thru(iommu->ecap) ) - context_set_translation_type(*context, CONTEXT_TT_PASS_THRU); - else - { - if ( !hd->pgd ) - { - struct dma_pte *pgd = (struct dma_pte *)alloc_xenheap_page(); - if ( !pgd ) - { - spin_unlock_irqrestore(&hd->mapping_lock, flags); - return -ENOMEM; - } - memset(pgd, 0, PAGE_SIZE); - hd->pgd = pgd; - } - - context_set_address_root(*context, virt_to_maddr(hd->pgd)); - context_set_translation_type(*context, CONTEXT_TT_MULTI_LEVEL); - } - + context_set_address_width(*context, agaw); context_set_fault_enable(*context); context_set_present(*context); - iommu_flush_cache_entry(iommu, context); + iommu_flush_cache_entry(context); gdprintk(XENLOG_INFO VTDPREFIX, "domain_context_mapping_one-%x:%x:%x-*context=%"PRIx64":%"PRIx64 @@ -1315,7 +1328,7 @@ static int domain_context_unmap_one( spin_lock_irqsave(&iommu->lock, flags); context_clear_present(*context); context_clear_entry(*context); - iommu_flush_cache_entry(iommu, context); + iommu_flush_cache_entry(context); iommu_flush_context_global(iommu, 0); iommu_flush_iotlb_global(iommu, 0); spin_unlock_irqrestore(&iommu->lock, flags); @@ -1558,7 +1571,7 @@ int iommu_map_page(struct domain *d, pad pte += gfn & LEVEL_MASK; dma_set_pte_addr(*pte, mfn << PAGE_SHIFT_4K); dma_set_pte_prot(*pte, DMA_PTE_READ | DMA_PTE_WRITE); - iommu_flush_cache_entry(iommu, pte); + iommu_flush_cache_entry(pte); unmap_domain_page(pte); for_each_drhd_unit ( drhd ) @@ -1606,8 +1619,6 @@ int iommu_page_mapping(struct domain *do int index; struct page_info *pg = NULL; - drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list); - iommu = drhd->iommu; if ( (prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0 ) return -EINVAL; iova = (iova >> PAGE_SHIFT_4K) << PAGE_SHIFT_4K; @@ -1624,7 +1635,7 @@ int iommu_page_mapping(struct domain *do pte += start_pfn & LEVEL_MASK; dma_set_pte_addr(*pte, start_pfn << PAGE_SHIFT_4K); dma_set_pte_prot(*pte, prot); - iommu_flush_cache_entry(iommu, pte); + iommu_flush_cache_entry(pte); unmap_domain_page(pte); start_pfn++; index++; @@ -1675,7 +1686,7 @@ void iommu_flush(struct domain *d, dma_a iommu_flush_write_buffer(iommu); } - iommu_flush_cache_entry(iommu, pte); + iommu_flush_cache_entry(pte); } static int iommu_prepare_rmrr_dev( diff -r 6de4320d71f9 -r f830b47149a4 xen/include/asm-x86/hvm/vmx/intel-iommu.h --- a/xen/include/asm-x86/hvm/vmx/intel-iommu.h Fri Jul 25 15:04:26 2008 +0100 +++ b/xen/include/asm-x86/hvm/vmx/intel-iommu.h Tue Jul 29 16:13:15 2008 +0100 @@ -232,6 +232,7 @@ struct context_entry { /* page table handling */ #define LEVEL_STRIDE (9) #define LEVEL_MASK ((1 << LEVEL_STRIDE) - 1) +#define level_to_agaw(val) ((val) - 2) #define agaw_to_level(val) ((val) + 2) #define agaw_to_width(val) (30 + val * LEVEL_STRIDE) #define width_to_agaw(w) ((w - 30)/LEVEL_STRIDE) diff -r 6de4320d71f9 -r f830b47149a4 xen/include/asm-x86/iommu.h --- a/xen/include/asm-x86/iommu.h Fri Jul 25 15:04:26 2008 +0100 +++ b/xen/include/asm-x86/iommu.h Tue Jul 29 16:13:15 2008 +0100 @@ -56,6 +56,7 @@ struct iommu { void __iomem *reg; /* Pointer to hardware regs, virtual addr */ u32 index; /* Sequence number of iommu */ u32 gcmd; /* Holds TE, EAFL. Don't need SRTP, SFL, WBF */ + u32 nr_pt_levels; u64 cap; u64 ecap; spinlock_t lock; /* protect context, domain ids */ _______________________________________________ Xen-changelog mailing list Xen-changelog@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-changelog
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |