[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [PATCH 1/2] amd-iommu: use a bitfield for PTE/PDE



On 2/4/19 5:19 AM, Paul Durrant wrote:
> The current use of get/set_field_from/in_reg_u32() is both inefficient and
> requires some ugly casting.
> 
> This patch defines a new bitfield structure (amd_iommu_pte) and uses this
> structure in all PTE/PDE manipulation, resulting in much more readable
> and compact code.
> 
> NOTE: This commit also fixes one malformed comment in
>        set_iommu_pte_present().
> 
> Signed-off-by: Paul Durrant <paul.durrant@xxxxxxxxxx>

Sorry about the delay.

Nitpick here, but I'd rather have !!IOMMUF_{writable,readable} than 
true.  Not worth a revision if there isn't anything else though (and is 
debatable).

Acked-by: Brian Woods <brian.woods@xxxxxxx>

> ---
> Cc: Suravee Suthikulpanit <suravee.suthikulpanit@xxxxxxx>
> Cc: Brian Woods <brian.woods@xxxxxxx>
> Cc: Jan Beulich <jbeulich@xxxxxxxx>
> Cc: Andrew Cooper <andrew.cooper3@xxxxxxxxxx>
> Cc: Wei Liu <wei.liu2@xxxxxxxxxx>
> Cc: "Roger Pau Monné" <roger.pau@xxxxxxxxxx>
> ---
>   xen/drivers/passthrough/amd/iommu_map.c       | 143 ++++--------------
>   xen/drivers/passthrough/amd/pci_amd_iommu.c   |  50 +++---
>   xen/include/asm-x86/hvm/svm/amd-iommu-defs.h  |  47 ++----
>   xen/include/asm-x86/hvm/svm/amd-iommu-proto.h |  15 --
>   4 files changed, 64 insertions(+), 191 deletions(-)
> 
> diff --git a/xen/drivers/passthrough/amd/iommu_map.c 
> b/xen/drivers/passthrough/amd/iommu_map.c
> index 67329b0c95..5fda6063df 100644
> --- a/xen/drivers/passthrough/amd/iommu_map.c
> +++ b/xen/drivers/passthrough/amd/iommu_map.c
> @@ -38,100 +38,45 @@ static unsigned int pfn_to_pde_idx(unsigned long pfn, 
> unsigned int level)
>   static unsigned int clear_iommu_pte_present(unsigned long l1_mfn,
>                                               unsigned long dfn)
>   {
> -    uint64_t *table, *pte;
> +    struct amd_iommu_pte *table, *pte;
>       unsigned int flush_flags;
>   
>       table = map_domain_page(_mfn(l1_mfn));
> +    pte = &table[pfn_to_pde_idx(dfn, 1)];
>   
> -    pte = (table + pfn_to_pde_idx(dfn, 1));
> +    flush_flags = pte->pr ? IOMMU_FLUSHF_modified : 0;
> +    memset(pte, 0, sizeof(*pte));
>   
> -    flush_flags = get_field_from_reg_u32(*pte, IOMMU_PTE_PRESENT_MASK,
> -                                         IOMMU_PTE_PRESENT_SHIFT) ?
> -                                         IOMMU_FLUSHF_modified : 0;
> -
> -    *pte = 0;
>       unmap_domain_page(table);
>   
>       return flush_flags;
>   }
>   
> -static unsigned int set_iommu_pde_present(uint32_t *pde,
> +static unsigned int set_iommu_pde_present(struct amd_iommu_pte *pte,
>                                             unsigned long next_mfn,
>                                             unsigned int next_level, bool iw,
>                                             bool ir)
>   {
> -    uint64_t maddr_next;
> -    uint32_t addr_lo, addr_hi, entry;
> -    bool old_present;
>       unsigned int flush_flags = IOMMU_FLUSHF_added;
>   
> -    maddr_next = __pfn_to_paddr(next_mfn);
> -
> -    old_present = get_field_from_reg_u32(pde[0], IOMMU_PTE_PRESENT_MASK,
> -                                         IOMMU_PTE_PRESENT_SHIFT);
> -    if ( old_present )
> -    {
> -        bool old_r, old_w;
> -        unsigned int old_level;
> -        uint64_t maddr_old;
> -
> -        addr_hi = get_field_from_reg_u32(pde[1],
> -                                         IOMMU_PTE_ADDR_HIGH_MASK,
> -                                         IOMMU_PTE_ADDR_HIGH_SHIFT);
> -        addr_lo = get_field_from_reg_u32(pde[0],
> -                                         IOMMU_PTE_ADDR_LOW_MASK,
> -                                         IOMMU_PTE_ADDR_LOW_SHIFT);
> -        old_level = get_field_from_reg_u32(pde[0],
> -                                           IOMMU_PDE_NEXT_LEVEL_MASK,
> -                                           IOMMU_PDE_NEXT_LEVEL_SHIFT);
> -        old_w = get_field_from_reg_u32(pde[1],
> -                                       IOMMU_PTE_IO_WRITE_PERMISSION_MASK,
> -                                       IOMMU_PTE_IO_WRITE_PERMISSION_SHIFT);
> -        old_r = get_field_from_reg_u32(pde[1],
> -                                       IOMMU_PTE_IO_READ_PERMISSION_MASK,
> -                                       IOMMU_PTE_IO_READ_PERMISSION_SHIFT);
> -
> -        maddr_old = ((uint64_t)addr_hi << 32) |
> -                    ((uint64_t)addr_lo << PAGE_SHIFT);
> -
> -        if ( maddr_old != maddr_next || iw != old_w || ir != old_r ||
> -             old_level != next_level )
> +    if ( pte->pr &&
> +         (pte->mfn != next_mfn ||
> +          pte->iw != iw ||
> +          pte->ir != ir ||
> +          pte->next_level != next_level) )
>               flush_flags |= IOMMU_FLUSHF_modified;
> -    }
>   
> -    addr_lo = maddr_next & DMA_32BIT_MASK;
> -    addr_hi = maddr_next >> 32;
> -
> -    /* enable read/write permissions,which will be enforced at the PTE */
> -    set_field_in_reg_u32(addr_hi, 0,
> -                         IOMMU_PDE_ADDR_HIGH_MASK,
> -                         IOMMU_PDE_ADDR_HIGH_SHIFT, &entry);
> -    set_field_in_reg_u32(iw, entry,
> -                         IOMMU_PDE_IO_WRITE_PERMISSION_MASK,
> -                         IOMMU_PDE_IO_WRITE_PERMISSION_SHIFT, &entry);
> -    set_field_in_reg_u32(ir, entry,
> -                         IOMMU_PDE_IO_READ_PERMISSION_MASK,
> -                         IOMMU_PDE_IO_READ_PERMISSION_SHIFT, &entry);
> -
> -    /* FC bit should be enabled in PTE, this helps to solve potential
> +    /*
> +     * FC bit should be enabled in PTE, this helps to solve potential
>        * issues with ATS devices
>        */
> -    if ( next_level == 0 )
> -        set_field_in_reg_u32(IOMMU_CONTROL_ENABLED, entry,
> -                             IOMMU_PTE_FC_MASK, IOMMU_PTE_FC_SHIFT, &entry);
> -    pde[1] = entry;


> +    pte->fc = !next_level;
>   
> -    /* mark next level as 'present' */
> -    set_field_in_reg_u32(addr_lo >> PAGE_SHIFT, 0,
> -                         IOMMU_PDE_ADDR_LOW_MASK,
> -                         IOMMU_PDE_ADDR_LOW_SHIFT, &entry);
> -    set_field_in_reg_u32(next_level, entry,
> -                         IOMMU_PDE_NEXT_LEVEL_MASK,
> -                         IOMMU_PDE_NEXT_LEVEL_SHIFT, &entry);
> -    set_field_in_reg_u32(IOMMU_CONTROL_ENABLED, entry,
> -                         IOMMU_PDE_PRESENT_MASK,
> -                         IOMMU_PDE_PRESENT_SHIFT, &entry);
> -    pde[0] = entry;
> +    pte->mfn = next_mfn;
> +    pte->iw = iw;
> +    pte->ir = ir;
> +    pte->next_level = next_level;
> +    pte->pr = 1;
>   
>       return flush_flags;
>   }
> @@ -142,13 +87,11 @@ static unsigned int set_iommu_pte_present(unsigned long 
> pt_mfn,
>                                             int pde_level,
>                                             bool iw, bool ir)
>   {
> -    uint64_t *table;
> -    uint32_t *pde;
> +    struct amd_iommu_pte *table, *pde;
>       unsigned int flush_flags;
>   
>       table = map_domain_page(_mfn(pt_mfn));
> -
> -    pde = (uint32_t *)(table + pfn_to_pde_idx(dfn, pde_level));
> +    pde = &table[pfn_to_pde_idx(dfn, pde_level)];
>   
>       flush_flags = set_iommu_pde_present(pde, next_mfn, 0, iw, ir);
>       unmap_domain_page(table);
> @@ -319,25 +262,6 @@ void iommu_dte_set_guest_cr3(uint32_t *dte, uint16_t 
> dom_id, uint64_t gcr3,
>       dte[1] = entry;
>   }
>   
> -uint64_t amd_iommu_get_address_from_pte(void *pte)
> -{
> -    uint32_t *entry = pte;
> -    uint32_t addr_lo, addr_hi;
> -    uint64_t ptr;
> -
> -    addr_lo = get_field_from_reg_u32(entry[0],
> -                                     IOMMU_PTE_ADDR_LOW_MASK,
> -                                     IOMMU_PTE_ADDR_LOW_SHIFT);
> -
> -    addr_hi = get_field_from_reg_u32(entry[1],
> -                                     IOMMU_PTE_ADDR_HIGH_MASK,
> -                                     IOMMU_PTE_ADDR_HIGH_SHIFT);
> -
> -    ptr = ((uint64_t)addr_hi << 32) |
> -          ((uint64_t)addr_lo << PAGE_SHIFT);
> -    return ptr;
> -}
> -
>   /* Walk io page tables and build level page tables if necessary
>    * {Re, un}mapping super page frames causes re-allocation of io
>    * page tables.
> @@ -345,7 +269,7 @@ uint64_t amd_iommu_get_address_from_pte(void *pte)
>   static int iommu_pde_from_dfn(struct domain *d, unsigned long dfn,
>                                 unsigned long pt_mfn[])
>   {
> -    uint64_t *pde, *next_table_vaddr;
> +    struct amd_iommu_pte *pde, *next_table_vaddr;
>       unsigned long  next_table_mfn;
>       unsigned int level;
>       struct page_info *table;
> @@ -370,15 +294,13 @@ static int iommu_pde_from_dfn(struct domain *d, 
> unsigned long dfn,
>           pt_mfn[level] = next_table_mfn;
>   
>           next_table_vaddr = map_domain_page(_mfn(next_table_mfn));
> -        pde = next_table_vaddr + pfn_to_pde_idx(dfn, level);
> +        pde = &next_table_vaddr[pfn_to_pde_idx(dfn, level)];
>   
>           /* Here might be a super page frame */
> -        next_table_mfn = amd_iommu_get_address_from_pte(pde) >> PAGE_SHIFT;
> +        next_table_mfn = pde->mfn;
>   
>           /* Split super page frame into smaller pieces.*/
> -        if ( iommu_is_pte_present((uint32_t *)pde) &&
> -             (iommu_next_level((uint32_t *)pde) == 0) &&
> -             next_table_mfn != 0 )
> +        if ( pde->pr && !pde->next_level && next_table_mfn )
>           {
>               int i;
>               unsigned long mfn, pfn;
> @@ -398,13 +320,13 @@ static int iommu_pde_from_dfn(struct domain *d, 
> unsigned long dfn,
>               }
>   
>               next_table_mfn = mfn_x(page_to_mfn(table));
> -            set_iommu_pde_present((uint32_t *)pde, next_table_mfn, 
> next_level,
> -                                  !!IOMMUF_writable, !!IOMMUF_readable);
> +            set_iommu_pde_present(pde, next_table_mfn, next_level, true,
> +                                  true);
>   
>               for ( i = 0; i < PTE_PER_TABLE_SIZE; i++ )
>               {
>                   set_iommu_pte_present(next_table_mfn, pfn, mfn, next_level,
> -                                      !!IOMMUF_writable, !!IOMMUF_readable);
> +                                      true, true);
>                   mfn += page_sz;
>                   pfn += page_sz;
>                }
> @@ -413,7 +335,7 @@ static int iommu_pde_from_dfn(struct domain *d, unsigned 
> long dfn,
>           }
>   
>           /* Install lower level page table for non-present entries */
> -        else if ( !iommu_is_pte_present((uint32_t *)pde) )
> +        else if ( !pde->pr )
>           {
>               if ( next_table_mfn == 0 )
>               {
> @@ -425,9 +347,8 @@ static int iommu_pde_from_dfn(struct domain *d, unsigned 
> long dfn,
>                       return 1;
>                   }
>                   next_table_mfn = mfn_x(page_to_mfn(table));
> -                set_iommu_pde_present((uint32_t *)pde, next_table_mfn,
> -                                      next_level, !!IOMMUF_writable,
> -                                      !!IOMMUF_readable);
> +                set_iommu_pde_present(pde, next_table_mfn, next_level, true,
> +                                      true);
>               }
>               else /* should never reach here */
>               {
> @@ -455,7 +376,7 @@ static int update_paging_mode(struct domain *d, unsigned 
> long dfn)
>       struct amd_iommu *iommu = NULL;
>       struct page_info *new_root = NULL;
>       struct page_info *old_root = NULL;
> -    void *new_root_vaddr;
> +    struct amd_iommu_pte *new_root_vaddr;
>       unsigned long old_root_mfn;
>       struct domain_iommu *hd = dom_iommu(d);
>   
> @@ -484,7 +405,7 @@ static int update_paging_mode(struct domain *d, unsigned 
> long dfn)
>           new_root_vaddr = __map_domain_page(new_root);
>           old_root_mfn = mfn_x(page_to_mfn(old_root));
>           set_iommu_pde_present(new_root_vaddr, old_root_mfn, level,
> -                              !!IOMMUF_writable, !!IOMMUF_readable);
> +                              true, true);
>           level++;
>           old_root = new_root;
>           offset >>= PTE_PER_TABLE_SHIFT;
> diff --git a/xen/drivers/passthrough/amd/pci_amd_iommu.c 
> b/xen/drivers/passthrough/amd/pci_amd_iommu.c
> index 33a3798f36..da6748320b 100644
> --- a/xen/drivers/passthrough/amd/pci_amd_iommu.c
> +++ b/xen/drivers/passthrough/amd/pci_amd_iommu.c
> @@ -376,9 +376,8 @@ static void deallocate_next_page_table(struct page_info 
> *pg, int level)
>   
>   static void deallocate_page_table(struct page_info *pg)
>   {
> -    void *table_vaddr, *pde;
> -    u64 next_table_maddr;
> -    unsigned int index, level = PFN_ORDER(pg), next_level;
> +    struct amd_iommu_pte *table_vaddr;
> +    unsigned int index, level = PFN_ORDER(pg);
>   
>       PFN_ORDER(pg) = 0;
>   
> @@ -392,17 +391,14 @@ static void deallocate_page_table(struct page_info *pg)
>   
>       for ( index = 0; index < PTE_PER_TABLE_SIZE; index++ )
>       {
> -        pde = table_vaddr + (index * IOMMU_PAGE_TABLE_ENTRY_SIZE);
> -        next_table_maddr = amd_iommu_get_address_from_pte(pde);
> -        next_level = iommu_next_level(pde);
> +        struct amd_iommu_pte *pde = &table_vaddr[index];
>   
> -        if ( (next_table_maddr != 0) && (next_level != 0) &&
> -             iommu_is_pte_present(pde) )
> +        if ( pde->mfn && pde->next_level && pde->pr )
>           {
>               /* We do not support skip levels yet */
> -            ASSERT(next_level == level - 1);
> -            deallocate_next_page_table(maddr_to_page(next_table_maddr),
> -                                       next_level);
> +            ASSERT(pde->next_level == level - 1);
> +            deallocate_next_page_table(mfn_to_page(_mfn(pde->mfn)),
> +                                       pde->next_level);
>           }
>       }
>   
> @@ -500,10 +496,8 @@ static void amd_dump_p2m_table_level(struct page_info* 
> pg, int level,
>                                        paddr_t gpa, int indent)
>   {
>       paddr_t address;
> -    void *table_vaddr, *pde;
> -    paddr_t next_table_maddr;
> -    int index, next_level, present;
> -    u32 *entry;
> +    struct amd_iommu_pte *table_vaddr;
> +    int index;
>   
>       if ( level < 1 )
>           return;
> @@ -518,42 +512,32 @@ static void amd_dump_p2m_table_level(struct page_info* 
> pg, int level,
>   
>       for ( index = 0; index < PTE_PER_TABLE_SIZE; index++ )
>       {
> +        struct amd_iommu_pte *pde = &table_vaddr[index];
> +
>           if ( !(index % 2) )
>               process_pending_softirqs();
>   
> -        pde = table_vaddr + (index * IOMMU_PAGE_TABLE_ENTRY_SIZE);
> -        next_table_maddr = amd_iommu_get_address_from_pte(pde);
> -        entry = pde;
> -
> -        present = get_field_from_reg_u32(entry[0],
> -                                         IOMMU_PDE_PRESENT_MASK,
> -                                         IOMMU_PDE_PRESENT_SHIFT);
> -
> -        if ( !present )
> +        if ( !pde->pr )
>               continue;
>   
> -        next_level = get_field_from_reg_u32(entry[0],
> -                                            IOMMU_PDE_NEXT_LEVEL_MASK,
> -                                            IOMMU_PDE_NEXT_LEVEL_SHIFT);
> -
> -        if ( next_level && (next_level != (level - 1)) )
> +        if ( pde->next_level && (pde->next_level != (level - 1)) )
>           {
>               printk("IOMMU p2m table error. next_level = %d, expected %d\n",
> -                   next_level, level - 1);
> +                   pde->next_level, level - 1);
>   
>               continue;
>           }
>   
>           address = gpa + amd_offset_level_address(index, level);
> -        if ( next_level >= 1 )
> +        if ( pde->next_level >= 1 )
>               amd_dump_p2m_table_level(
> -                maddr_to_page(next_table_maddr), next_level,
> +                mfn_to_page(_mfn(pde->mfn)), pde->next_level,
>                   address, indent + 1);
>           else
>               printk("%*sdfn: %08lx  mfn: %08lx\n",
>                      indent, "",
>                      (unsigned long)PFN_DOWN(address),
> -                   (unsigned long)PFN_DOWN(next_table_maddr));
> +                   (unsigned long)PFN_DOWN(pfn_to_paddr(pde->mfn)));
>       }
>   
>       unmap_domain_page(table_vaddr);
> diff --git a/xen/include/asm-x86/hvm/svm/amd-iommu-defs.h 
> b/xen/include/asm-x86/hvm/svm/amd-iommu-defs.h
> index a217245249..a3a49f91eb 100644
> --- a/xen/include/asm-x86/hvm/svm/amd-iommu-defs.h
> +++ b/xen/include/asm-x86/hvm/svm/amd-iommu-defs.h
> @@ -413,38 +413,21 @@
>   #define IOMMU_PAGE_TABLE_U32_PER_ENTRY      (IOMMU_PAGE_TABLE_ENTRY_SIZE / 
> 4)
>   #define IOMMU_PAGE_TABLE_ALIGNMENT  4096
>   
> -#define IOMMU_PTE_PRESENT_MASK                       0x00000001
> -#define IOMMU_PTE_PRESENT_SHIFT                      0
> -#define IOMMU_PTE_NEXT_LEVEL_MASK            0x00000E00
> -#define IOMMU_PTE_NEXT_LEVEL_SHIFT           9
> -#define IOMMU_PTE_ADDR_LOW_MASK                      0xFFFFF000
> -#define IOMMU_PTE_ADDR_LOW_SHIFT             12
> -#define IOMMU_PTE_ADDR_HIGH_MASK             0x000FFFFF
> -#define IOMMU_PTE_ADDR_HIGH_SHIFT            0
> -#define IOMMU_PTE_U_MASK                     0x08000000
> -#define IOMMU_PTE_U_SHIFT                    7
> -#define IOMMU_PTE_FC_MASK                    0x10000000
> -#define IOMMU_PTE_FC_SHIFT                   28
> -#define IOMMU_PTE_IO_READ_PERMISSION_MASK    0x20000000
> -#define IOMMU_PTE_IO_READ_PERMISSION_SHIFT   29
> -#define IOMMU_PTE_IO_WRITE_PERMISSION_MASK   0x40000000
> -#define IOMMU_PTE_IO_WRITE_PERMISSION_SHIFT  30
> -
> -/* I/O Page Directory */
> -#define IOMMU_PAGE_DIRECTORY_ENTRY_SIZE              8
> -#define IOMMU_PAGE_DIRECTORY_ALIGNMENT               4096
> -#define IOMMU_PDE_PRESENT_MASK                       0x00000001
> -#define IOMMU_PDE_PRESENT_SHIFT                      0
> -#define IOMMU_PDE_NEXT_LEVEL_MASK            0x00000E00
> -#define IOMMU_PDE_NEXT_LEVEL_SHIFT           9
> -#define IOMMU_PDE_ADDR_LOW_MASK                      0xFFFFF000
> -#define IOMMU_PDE_ADDR_LOW_SHIFT             12
> -#define IOMMU_PDE_ADDR_HIGH_MASK             0x000FFFFF
> -#define IOMMU_PDE_ADDR_HIGH_SHIFT            0
> -#define IOMMU_PDE_IO_READ_PERMISSION_MASK    0x20000000
> -#define IOMMU_PDE_IO_READ_PERMISSION_SHIFT   29
> -#define IOMMU_PDE_IO_WRITE_PERMISSION_MASK   0x40000000
> -#define IOMMU_PDE_IO_WRITE_PERMISSION_SHIFT  30
> +struct amd_iommu_pte {
> +    uint64_t pr:1;
> +    uint64_t ignored0:4;
> +    uint64_t a:1;
> +    uint64_t d:1;
> +    uint64_t ignored1:2;
> +    uint64_t next_level:3;
> +    uint64_t mfn:40;
> +    uint64_t reserved:7;
> +    uint64_t u:1;
> +    uint64_t fc:1;
> +    uint64_t ir:1;
> +    uint64_t iw:1;
> +    uint64_t ignored2:1;
> +};
>   
>   /* Paging modes */
>   #define IOMMU_PAGING_MODE_DISABLED  0x0
> diff --git a/xen/include/asm-x86/hvm/svm/amd-iommu-proto.h 
> b/xen/include/asm-x86/hvm/svm/amd-iommu-proto.h
> index c5697565d6..1c1971bb7c 100644
> --- a/xen/include/asm-x86/hvm/svm/amd-iommu-proto.h
> +++ b/xen/include/asm-x86/hvm/svm/amd-iommu-proto.h
> @@ -57,7 +57,6 @@ int __must_check amd_iommu_map_page(struct domain *d, dfn_t 
> dfn,
>                                       unsigned int *flush_flags);
>   int __must_check amd_iommu_unmap_page(struct domain *d, dfn_t dfn,
>                                         unsigned int *flush_flags);
> -uint64_t amd_iommu_get_address_from_pte(void *entry);
>   int __must_check amd_iommu_alloc_root(struct domain_iommu *hd);
>   int amd_iommu_reserve_domain_unity_map(struct domain *domain,
>                                          paddr_t phys_addr, unsigned long 
> size,
> @@ -280,18 +279,4 @@ static inline void iommu_set_addr_hi_to_reg(uint32_t 
> *reg, uint32_t addr)
>                            IOMMU_REG_BASE_ADDR_HIGH_SHIFT, reg);
>   }
>   
> -static inline int iommu_is_pte_present(const u32 *entry)
> -{
> -    return get_field_from_reg_u32(entry[0],
> -                                  IOMMU_PDE_PRESENT_MASK,
> -                                  IOMMU_PDE_PRESENT_SHIFT);
> -}
> -
> -static inline unsigned int iommu_next_level(const u32 *entry)
> -{
> -    return get_field_from_reg_u32(entry[0],
> -                                  IOMMU_PDE_NEXT_LEVEL_MASK,
> -                                  IOMMU_PDE_NEXT_LEVEL_SHIFT);
> -}
> -
>   #endif /* _ASM_X86_64_AMD_IOMMU_PROTO_H */
> 

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxxx
https://lists.xenproject.org/mailman/listinfo/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.