[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[PATCH RFC v2 1/1] wiotlb: split buffer into 32-bit default and 64-bit extra zones



Hello,

I used to send out RFC v1 to introduce an extra io_tlb_mem (created with
SWIOTLB_ANY) in addition to the default io_tlb_mem (32-bit).  The
dev->dma_io_tlb_mem is set to either default or the extra io_tlb_mem,
depending on dma mask. However, that is not good for setting
dev->dma_io_tlb_mem at swiotlb layer transparently as suggested by
Christoph Hellwig.

https://lore.kernel.org/all/20220609005553.30954-1-dongli.zhang@xxxxxxxxxx/

Therefore, this is another RFC v2 implementation following a different
direction. The core ideas are:

1. The swiotlb is splited into two zones, io_tlb_mem->zone[0] (32-bit) and
io_tlb_mem->zone[1] (64-bit).

struct io_tlb_mem {
        struct io_tlb_zone zone[SWIOTLB_NR];
        struct dentry *debugfs;
        bool late_alloc;
        bool force_bounce;
        bool for_alloc;
        bool has_extra;
};

struct io_tlb_zone {
        phys_addr_t start;
        phys_addr_t end;
        void *vaddr;
        unsigned long nslabs;
        unsigned long used;
        unsigned int nareas;
        unsigned int area_nslabs;
        struct io_tlb_area *areas;
        struct io_tlb_slot *slots;
};

2. By default, only io_tlb_mem->zone[0] is available. The
io_tlb_mem->zone[1] is allocated conditionally if:

- the "swiotlb=" is configured to allocate extra buffer, and
- the SWIOTLB_EXTRA is set in the flag (this is to make sure arch(s) other
  than x86/sev/xen will not enable it until it is fully tested by each
  arch, e.g., mips/powerpc). Currently it is enabled for x86 and xen.

3. During swiotlb map, whether zone[0] (32-bit) or zone[1] (64-bit
SWIOTLB_ANY)
is used depends on min_not_zero(*dev->dma_mask, dev->bus_dma_limit).

To test the RFC v2, here is the QEMU command line.

qemu-system-x86_64 -smp 8 -m 32G -enable-kvm -vnc :5 -hda disk.img \
-kernel path-to-linux/arch/x86_64/boot/bzImage \
-append "root=/dev/sda1 init=/sbin/init text console=ttyS0 loglevel=7 
swiotlb=32768,4194304,force" \
-net nic -net user,hostfwd=tcp::5025-:22 \
-device nvme,drive=nvme01,serial=helloworld -drive 
file=test.qcow2,if=none,id=nvme01 \
-serial stdio

There is below in syslog. The extra 8GB buffer is allocated.

[    0.152251] software IO TLB: area num 8.
... ...
[    3.706088] PCI-DMA: Using software bounce buffering for IO (SWIOTLB)
[    3.707334] software IO TLB: mapped default [mem 
0x00000000bbfd7000-0x00000000bffd7000] (64MB)
[    3.708585] software IO TLB: mapped extra [mem 
0x000000061cc00000-0x000000081cc00000] (8192MB)

After the FIO is triggered over NVMe, the 64-bit buffer is used.

$ cat /sys/kernel/debug/swiotlb/io_tlb_nslabs_extra
4194304
$ cat /sys/kernel/debug/swiotlb/io_tlb_used_extra
327552

Would you mind helping if this is the right direction to go?

Thank you very much!

Cc: Konrad Wilk <konrad.wilk@xxxxxxxxxx>
Cc: Joe Jin <joe.jin@xxxxxxxxxx>
Signed-off-by: Dongli Zhang <dongli.zhang@xxxxxxxxxx>
---
 arch/arm/xen/mm.c                      |   2 +-
 arch/mips/pci/pci-octeon.c             |   5 +-
 arch/x86/include/asm/xen/swiotlb-xen.h |   2 +-
 arch/x86/kernel/pci-dma.c              |   6 +-
 drivers/xen/swiotlb-xen.c              |  18 +-
 include/linux/swiotlb.h                |  73 +++--
 kernel/dma/swiotlb.c                   | 499 +++++++++++++++++++++------------
 7 files changed, 388 insertions(+), 217 deletions(-)

diff --git a/arch/arm/xen/mm.c b/arch/arm/xen/mm.c
index 3d826c0..4edfa42 100644
--- a/arch/arm/xen/mm.c
+++ b/arch/arm/xen/mm.c
@@ -125,7 +125,7 @@ static int __init xen_mm_init(void)
                return 0;
 
        /* we can work with the default swiotlb */
-       if (!io_tlb_default_mem.nslabs) {
+       if (!io_tlb_default_mem.zone[SWIOTLB_DF].nslabs) {
                rc = swiotlb_init_late(swiotlb_size_or_default(),
                                       xen_swiotlb_gfp(), NULL);
                if (rc < 0)
diff --git a/arch/mips/pci/pci-octeon.c b/arch/mips/pci/pci-octeon.c
index e457a18..0bf0859 100644
--- a/arch/mips/pci/pci-octeon.c
+++ b/arch/mips/pci/pci-octeon.c
@@ -654,6 +654,9 @@ static int __init octeon_pci_setup(void)
                octeon_pci_mem_resource.end =
                        octeon_pci_mem_resource.start + (1ul << 30);
        } else {
+               struct io_tlb_mem *mem = &io_tlb_default_mem;
+               struct io_tlb_zone *zone = &mem->zone[SWIOTLB_DF];
+
                /* Remap the Octeon BAR 0 to map 128MB-(128MB+4KB) */
                octeon_npi_write32(CVMX_NPI_PCI_CFG04, 128ul << 20);
                octeon_npi_write32(CVMX_NPI_PCI_CFG05, 0);
@@ -664,7 +667,7 @@ static int __init octeon_pci_setup(void)
 
                /* BAR1 movable regions contiguous to cover the swiotlb */
                octeon_bar1_pci_phys =
-                       io_tlb_default_mem.start & ~((1ull << 22) - 1);
+                       zone->start & ~((1ull << 22) - 1);
 
                for (index = 0; index < 32; index++) {
                        union cvmx_pci_bar1_indexx bar1_index;
diff --git a/arch/x86/include/asm/xen/swiotlb-xen.h 
b/arch/x86/include/asm/xen/swiotlb-xen.h
index 77a2d19..9476a9f 100644
--- a/arch/x86/include/asm/xen/swiotlb-xen.h
+++ b/arch/x86/include/asm/xen/swiotlb-xen.h
@@ -8,7 +8,7 @@
 static inline int pci_xen_swiotlb_init_late(void) { return -ENXIO; }
 #endif
 
-int xen_swiotlb_fixup(void *buf, unsigned long nslabs);
+int xen_swiotlb_fixup(void *buf, unsigned long nslabs, unsigned int flags);
 int xen_create_contiguous_region(phys_addr_t pstart, unsigned int order,
                                unsigned int address_bits,
                                dma_addr_t *dma_handle);
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
index 30bbe4a..a8736e7 100644
--- a/arch/x86/kernel/pci-dma.c
+++ b/arch/x86/kernel/pci-dma.c
@@ -77,7 +77,7 @@ static void __init pci_xen_swiotlb_init(void)
        if (!xen_initial_domain() && !x86_swiotlb_enable)
                return;
        x86_swiotlb_enable = true;
-       x86_swiotlb_flags |= SWIOTLB_ANY;
+       x86_swiotlb_flags |= SWIOTLB_ANY | SWIOTLB_EXTRA;
        swiotlb_init_remap(true, x86_swiotlb_flags, xen_swiotlb_fixup);
        dma_ops = &xen_swiotlb_dma_ops;
        if (IS_ENABLED(CONFIG_PCI))
@@ -90,7 +90,7 @@ int pci_xen_swiotlb_init_late(void)
                return 0;
 
        /* we can work with the default swiotlb */
-       if (!io_tlb_default_mem.nslabs) {
+       if (!io_tlb_default_mem.zone[SWIOTLB_DF].nslabs) {
                int rc = swiotlb_init_late(swiotlb_size_or_default(),
                                           GFP_KERNEL, xen_swiotlb_fixup);
                if (rc < 0)
@@ -120,6 +120,8 @@ void __init pci_iommu_alloc(void)
        gart_iommu_hole_init();
        amd_iommu_detect();
        detect_intel_iommu();
+
+       x86_swiotlb_flags |= SWIOTLB_EXTRA;
        swiotlb_init(x86_swiotlb_enable, x86_swiotlb_flags);
 }
 
diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c
index 67aa74d..3ff082b 100644
--- a/drivers/xen/swiotlb-xen.c
+++ b/drivers/xen/swiotlb-xen.c
@@ -38,7 +38,6 @@
 #include <asm/dma-mapping.h>
 
 #include <trace/events/swiotlb.h>
-#define MAX_DMA_BITS 32
 
 /*
  * Quick lookup value of the bus address of the IOTLB.
@@ -104,24 +103,32 @@ static int is_xen_swiotlb_buffer(struct device *dev, 
dma_addr_t dma_addr)
 }
 
 #ifdef CONFIG_X86
-int xen_swiotlb_fixup(void *buf, unsigned long nslabs)
+int xen_swiotlb_fixup(void *buf, unsigned long nslabs, unsigned int flags)
 {
        int rc;
        unsigned int order = get_order(IO_TLB_SEGSIZE << IO_TLB_SHIFT);
        unsigned int i, dma_bits = order + PAGE_SHIFT;
        dma_addr_t dma_handle;
        phys_addr_t p = virt_to_phys(buf);
+       unsigned int max_dma_bits = 32;
 
        BUILD_BUG_ON(IO_TLB_SEGSIZE & (IO_TLB_SEGSIZE - 1));
        BUG_ON(nslabs % IO_TLB_SEGSIZE);
 
+#ifdef CONFIG_X86_64
+       if (flags & SWIOTLB_EXTRA) {
+               max_dma_bits = 64;
+               dma_bits = 64;
+       }
+#endif
+
        i = 0;
        do {
                do {
                        rc = xen_create_contiguous_region(
                                p + (i << IO_TLB_SHIFT), order,
                                dma_bits, &dma_handle);
-               } while (rc && dma_bits++ < MAX_DMA_BITS);
+               } while (rc && dma_bits++ < max_dma_bits);
                if (rc)
                        return rc;
 
@@ -381,7 +388,10 @@ static void xen_swiotlb_unmap_page(struct device *hwdev, 
dma_addr_t dev_addr,
 static int
 xen_swiotlb_dma_supported(struct device *hwdev, u64 mask)
 {
-       return xen_phys_to_dma(hwdev, io_tlb_default_mem.end - 1) <= mask;
+       struct io_tlb_mem *mem = &io_tlb_default_mem;
+       struct io_tlb_zone *zone = &mem->zone[SWIOTLB_DF];
+
+       return xen_phys_to_dma(hwdev, zone->end - 1) <= mask;
 }
 
 const struct dma_map_ops xen_swiotlb_dma_ops = {
diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h
index 35bc4e2..365e5d7 100644
--- a/include/linux/swiotlb.h
+++ b/include/linux/swiotlb.h
@@ -16,6 +16,11 @@
 #define SWIOTLB_VERBOSE        (1 << 0) /* verbose initialization */
 #define SWIOTLB_FORCE  (1 << 1) /* force bounce buffering */
 #define SWIOTLB_ANY    (1 << 2) /* allow any memory for the buffer */
+#define SWIOTLB_EXTRA  (1 << 3) /* allow extra buffer if supported */
+
+#define SWIOTLB_DF     0
+#define SWIOTLB_EX     1
+#define SWIOTLB_NR     2
 
 /*
  * Maximum allowable number of contiguous slabs to map,
@@ -36,9 +41,9 @@
 
 unsigned long swiotlb_size_or_default(void);
 void __init swiotlb_init_remap(bool addressing_limit, unsigned int flags,
-       int (*remap)(void *tlb, unsigned long nslabs));
+       int (*remap)(void *tlb, unsigned long nslabs, unsigned int flags));
 int swiotlb_init_late(size_t size, gfp_t gfp_mask,
-       int (*remap)(void *tlb, unsigned long nslabs));
+       int (*remap)(void *tlb, unsigned long nslabs, unsigned int flags));
 extern void __init swiotlb_update_mem_attributes(void);
 
 phys_addr_t swiotlb_tbl_map_single(struct device *hwdev, phys_addr_t phys,
@@ -62,54 +67,78 @@ dma_addr_t swiotlb_map(struct device *dev, phys_addr_t phys,
 #ifdef CONFIG_SWIOTLB
 
 /**
- * struct io_tlb_mem - IO TLB Memory Pool Descriptor
+ * struct io_tlb_zone - IO TLB Memory Zone Descriptor
  *
- * @start:     The start address of the swiotlb memory pool. Used to do a quick
+ * @start:     The start address of the swiotlb memory zone. Used to do a quick
  *             range check to see if the memory was in fact allocated by this
  *             API.
- * @end:       The end address of the swiotlb memory pool. Used to do a quick
+ * @end:       The end address of the swiotlb memory zone. Used to do a quick
  *             range check to see if the memory was in fact allocated by this
  *             API.
- * @vaddr:     The vaddr of the swiotlb memory pool. The swiotlb memory pool
+ * @vaddr:     The vaddr of the swiotlb memory zone. The swiotlb memory zone
  *             may be remapped in the memory encrypted case and store virtual
  *             address for bounce buffer operation.
  * @nslabs:    The number of IO TLB blocks (in groups of 64) between @start and
  *             @end. For default swiotlb, this is command line adjustable via
  *             setup_io_tlb_npages.
  * @used:      The number of used IO TLB block.
- * @list:      The free list describing the number of free entries available
- *             from each index.
- * @orig_addr: The original address corresponding to a mapped entry.
- * @alloc_size:        Size of the allocated buffer.
- * @debugfs:   The dentry to debugfs.
- * @late_alloc:        %true if allocated using the page allocator
- * @force_bounce: %true if swiotlb bouncing is forced
- * @for_alloc:  %true if the pool is used for memory allocation
- * @nareas:  The area number in the pool.
- * @area_nslabs: The slot number in the area.
+ * @nareas:    The area number in the zone.
+ * @area_nslabs:The slot number in the area.
+ * @area:      The IO TLB memory area descriptor.
+ * @slots:     The data to track memory allocation.
  */
-struct io_tlb_mem {
+struct io_tlb_zone {
        phys_addr_t start;
        phys_addr_t end;
        void *vaddr;
        unsigned long nslabs;
        unsigned long used;
-       struct dentry *debugfs;
-       bool late_alloc;
-       bool force_bounce;
-       bool for_alloc;
        unsigned int nareas;
        unsigned int area_nslabs;
        struct io_tlb_area *areas;
        struct io_tlb_slot *slots;
 };
+
+/**
+ * struct io_tlb_mem - IO TLB Memory Pool Descriptor
+ *
+ * @zone:      IO TLB memory zone descriptor.
+ * @debugfs:   The dentry to debugfs.
+ * @late_alloc:        %true if allocated using the page allocator.
+ * @force_bounce: %true if swiotlb bouncing is forced.
+ * @for_alloc: %true if the pool is used for memory allocation.
+ * @has_extra: %true if the extra zone (e.g., 64-bit) is available.
+ */
+struct io_tlb_mem {
+       struct io_tlb_zone zone[SWIOTLB_NR];
+       struct dentry *debugfs;
+       bool late_alloc;
+       bool force_bounce;
+       bool for_alloc;
+       bool has_extra;
+};
 extern struct io_tlb_mem io_tlb_default_mem;
 
 static inline bool is_swiotlb_buffer(struct device *dev, phys_addr_t paddr)
 {
        struct io_tlb_mem *mem = dev->dma_io_tlb_mem;
+       struct io_tlb_zone *zone;
+       int end_zone;
+       int i;
+
+       if (!mem)
+               return false;
+
+       end_zone = mem->has_extra ? SWIOTLB_EX : SWIOTLB_DF;
 
-       return mem && paddr >= mem->start && paddr < mem->end;
+       for (i = end_zone ; i >= 0 ; i--) {
+               zone = &mem->zone[i];
+
+               if (paddr >= zone->start && paddr < zone->end)
+                       return true;
+       }
+
+       return false;
 }
 
 static inline bool is_swiotlb_force_bounce(struct device *dev)
diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
index c5a9190..392f21b 100644
--- a/kernel/dma/swiotlb.c
+++ b/kernel/dma/swiotlb.c
@@ -76,6 +76,7 @@ struct io_tlb_slot {
 phys_addr_t swiotlb_unencrypted_base;
 
 static unsigned long default_nslabs = IO_TLB_DEFAULT_SIZE >> IO_TLB_SHIFT;
+static unsigned long extra_nslabs;
 static unsigned long default_nareas;
 
 /**
@@ -96,24 +97,24 @@ struct io_tlb_area {
 
 /*
  * Round up number of slabs to the next power of 2. The last area is going
- * be smaller than the rest if default_nslabs is not power of two.
+ * be smaller than the rest if *nslabs_ptr is not power of two.
  * The number of slot in an area should be a multiple of IO_TLB_SEGSIZE,
  * otherwise a segment may span two or more areas. It conflicts with free
  * contiguous slots tracking: free slots are treated contiguous no matter
  * whether they cross an area boundary.
  *
- * Return true if default_nslabs is rounded up.
+ * Return true if *nslabs_ptr is rounded up.
  */
-static bool round_up_default_nslabs(void)
+static bool round_up_nslabs_config(unsigned long *nslabs_ptr)
 {
        if (!default_nareas)
                return false;
 
-       if (default_nslabs < IO_TLB_SEGSIZE * default_nareas)
-               default_nslabs = IO_TLB_SEGSIZE * default_nareas;
-       else if (is_power_of_2(default_nslabs))
+       if (*nslabs_ptr < IO_TLB_SEGSIZE * default_nareas)
+               *nslabs_ptr = IO_TLB_SEGSIZE * default_nareas;
+       else if (is_power_of_2(*nslabs_ptr))
                return false;
-       default_nslabs = roundup_pow_of_two(default_nslabs);
+       *nslabs_ptr = roundup_pow_of_two(*nslabs_ptr);
        return true;
 }
 
@@ -128,9 +129,14 @@ static void swiotlb_adjust_nareas(unsigned int nareas)
        default_nareas = nareas;
 
        pr_info("area num %d.\n", nareas);
-       if (round_up_default_nslabs())
+
+       if (round_up_nslabs_config(&default_nslabs))
                pr_info("SWIOTLB bounce buffer size roundup to %luMB",
                        (default_nslabs << IO_TLB_SHIFT) >> 20);
+
+       if (extra_nslabs && round_up_nslabs_config(&extra_nslabs))
+               pr_info("SWIOTLB extra bounce buffer size roundup to %luMB",
+                       (extra_nslabs << IO_TLB_SHIFT) >> 20);
 }
 
 static int __init
@@ -144,6 +150,11 @@ static void swiotlb_adjust_nareas(unsigned int nareas)
        if (*str == ',')
                ++str;
        if (isdigit(*str))
+               extra_nslabs =
+                       ALIGN(simple_strtoul(str, &str, 0), IO_TLB_SEGSIZE);
+       if (*str == ',')
+               ++str;
+       if (isdigit(*str))
                swiotlb_adjust_nareas(simple_strtoul(str, &str, 0));
        if (*str == ',')
                ++str;
@@ -158,9 +169,11 @@ static void swiotlb_adjust_nareas(unsigned int nareas)
 
 unsigned int swiotlb_max_segment(void)
 {
-       if (!io_tlb_default_mem.nslabs)
+       struct io_tlb_zone *zone = &io_tlb_default_mem.zone[SWIOTLB_DF];
+
+       if (!zone->nslabs)
                return 0;
-       return rounddown(io_tlb_default_mem.nslabs << IO_TLB_SHIFT, PAGE_SIZE);
+       return rounddown(zone->nslabs << IO_TLB_SHIFT, PAGE_SIZE);
 }
 EXPORT_SYMBOL_GPL(swiotlb_max_segment);
 
@@ -181,22 +194,49 @@ void __init swiotlb_adjust_size(unsigned long size)
 
        size = ALIGN(size, IO_TLB_SIZE);
        default_nslabs = ALIGN(size >> IO_TLB_SHIFT, IO_TLB_SEGSIZE);
-       if (round_up_default_nslabs())
+       if (round_up_nslabs_config(&default_nslabs))
                size = default_nslabs << IO_TLB_SHIFT;
-       pr_info("SWIOTLB bounce buffer size adjusted to %luMB", size >> 20);
+       pr_info("SWIOTLB default bounce buffer size adjusted to %luMB",
+               size >> 20);
 }
 
 void swiotlb_print_info(void)
 {
        struct io_tlb_mem *mem = &io_tlb_default_mem;
+       int nr_zone = mem->has_extra ? SWIOTLB_NR : SWIOTLB_NR - 1;
+       struct io_tlb_zone *zone;
+       int i;
 
-       if (!mem->nslabs) {
+       if (!mem->zone[SWIOTLB_DF].nslabs) {
                pr_warn("No low mem\n");
                return;
        }
 
-       pr_info("mapped [mem %pa-%pa] (%luMB)\n", &mem->start, &mem->end,
-              (mem->nslabs << IO_TLB_SHIFT) >> 20);
+       for (i = 0; i < nr_zone; i++) {
+               zone = &mem->zone[i];
+               pr_info("mapped %s [mem %pa-%pa] (%luMB)\n",
+                       i == 0 ? "default" : "extra",
+                       &zone->start, &zone->end,
+                       (zone->nslabs << IO_TLB_SHIFT) >> 20);
+       }
+}
+
+static int swiotlb_zone_index(struct io_tlb_mem *mem, phys_addr_t paddr)
+{
+       struct io_tlb_zone *zone;
+       int end_zone;
+       int i;
+
+       end_zone = mem->has_extra ? SWIOTLB_EX : SWIOTLB_DF;
+
+       for (i = end_zone ; i >= 0 ; i--) {
+               zone = &mem->zone[i];
+
+               if (paddr >= zone->start && paddr < zone->end)
+                       return i;
+       }
+
+       return -EINVAL;
 }
 
 static inline unsigned long io_tlb_offset(unsigned long val)
@@ -215,12 +255,13 @@ static inline unsigned long nr_slots(u64 val)
  * Isolation VMs).
  */
 #ifdef CONFIG_HAS_IOMEM
-static void *swiotlb_mem_remap(struct io_tlb_mem *mem, unsigned long bytes)
+static void *swiotlb_mem_remap(struct io_tlb_zone *zone,
+                              unsigned long bytes)
 {
        void *vaddr = NULL;
 
        if (swiotlb_unencrypted_base) {
-               phys_addr_t paddr = mem->start + swiotlb_unencrypted_base;
+               phys_addr_t paddr = zone->start + swiotlb_unencrypted_base;
 
                vaddr = memremap(paddr, bytes, MEMREMAP_WB);
                if (!vaddr)
@@ -231,7 +272,8 @@ static void *swiotlb_mem_remap(struct io_tlb_mem *mem, 
unsigned long bytes)
        return vaddr;
 }
 #else
-static void *swiotlb_mem_remap(struct io_tlb_mem *mem, unsigned long bytes)
+static void *swiotlb_mem_remap(struct io_tlb_zone *zone,
+                              unsigned long bytes)
 {
        return NULL;
 }
@@ -246,46 +288,53 @@ static void *swiotlb_mem_remap(struct io_tlb_mem *mem, 
unsigned long bytes)
 void __init swiotlb_update_mem_attributes(void)
 {
        struct io_tlb_mem *mem = &io_tlb_default_mem;
+       int nr_zone = mem->has_extra ? SWIOTLB_NR : SWIOTLB_NR - 1;
        void *vaddr;
        unsigned long bytes;
+       int i;
 
-       if (!mem->nslabs || mem->late_alloc)
+       if (mem->late_alloc)
                return;
-       vaddr = phys_to_virt(mem->start);
-       bytes = PAGE_ALIGN(mem->nslabs << IO_TLB_SHIFT);
-       set_memory_decrypted((unsigned long)vaddr, bytes >> PAGE_SHIFT);
 
-       mem->vaddr = swiotlb_mem_remap(mem, bytes);
-       if (!mem->vaddr)
-               mem->vaddr = vaddr;
+       for (i = 0; i < nr_zone; i++) {
+               struct io_tlb_zone *zone = &mem->zone[i];
+
+               if (!zone->nslabs)
+                       continue;
+               vaddr = phys_to_virt(zone->start);
+               bytes = PAGE_ALIGN(zone->nslabs << IO_TLB_SHIFT);
+               set_memory_decrypted((unsigned long)vaddr, bytes >> PAGE_SHIFT);
+
+               zone->vaddr = swiotlb_mem_remap(zone, bytes);
+               if (!zone->vaddr)
+                       zone->vaddr = vaddr;
+       }
 }
 
-static void swiotlb_init_io_tlb_mem(struct io_tlb_mem *mem, phys_addr_t start,
-               unsigned long nslabs, unsigned int flags,
-               bool late_alloc, unsigned int nareas)
+static void swiotlb_init_io_tlb_zone(struct io_tlb_zone *zone,
+                                    phys_addr_t start,
+                                    unsigned long nslabs,
+                                    unsigned int nareas)
 {
        void *vaddr = phys_to_virt(start);
        unsigned long bytes = nslabs << IO_TLB_SHIFT, i;
 
-       mem->nslabs = nslabs;
-       mem->start = start;
-       mem->end = mem->start + bytes;
-       mem->late_alloc = late_alloc;
-       mem->nareas = nareas;
-       mem->area_nslabs = nslabs / mem->nareas;
-
-       mem->force_bounce = swiotlb_force_bounce || (flags & SWIOTLB_FORCE);
+       zone->nslabs = nslabs;
+       zone->start = start;
+       zone->end = zone->start + bytes;
+       zone->nareas = nareas;
+       zone->area_nslabs = nslabs / zone->nareas;
 
-       for (i = 0; i < mem->nareas; i++) {
-               spin_lock_init(&mem->areas[i].lock);
-               mem->areas[i].index = 0;
-               mem->areas[i].used = 0;
+       for (i = 0; i < zone->nareas; i++) {
+               spin_lock_init(&zone->areas[i].lock);
+               zone->areas[i].index = 0;
+               zone->areas[i].used = 0;
        }
 
-       for (i = 0; i < mem->nslabs; i++) {
-               mem->slots[i].list = IO_TLB_SEGSIZE - io_tlb_offset(i);
-               mem->slots[i].orig_addr = INVALID_PHYS_ADDR;
-               mem->slots[i].alloc_size = 0;
+       for (i = 0; i < zone->nslabs; i++) {
+               zone->slots[i].list = IO_TLB_SEGSIZE - io_tlb_offset(i);
+               zone->slots[i].orig_addr = INVALID_PHYS_ADDR;
+               zone->slots[i].alloc_size = 0;
        }
 
        /*
@@ -296,44 +345,19 @@ static void swiotlb_init_io_tlb_mem(struct io_tlb_mem 
*mem, phys_addr_t start,
                return;
 
        memset(vaddr, 0, bytes);
-       mem->vaddr = vaddr;
+       zone->vaddr = vaddr;
        return;
 }
 
-/*
- * Statically reserve bounce buffer space and initialize bounce buffer data
- * structures for the software IO TLB used to implement the DMA API.
- */
-void __init swiotlb_init_remap(bool addressing_limit, unsigned int flags,
-               int (*remap)(void *tlb, unsigned long nslabs))
+void __init swiotlb_init_remap_zone(struct io_tlb_zone *zone,
+               unsigned long nslabs, unsigned int flags,
+               int (*remap)(void *tlb, unsigned long nslabs,
+                            unsigned int flags))
 {
-       struct io_tlb_mem *mem = &io_tlb_default_mem;
-       unsigned long nslabs;
        size_t alloc_size;
        size_t bytes;
        void *tlb;
 
-       if (!addressing_limit && !swiotlb_force_bounce)
-               return;
-       if (swiotlb_force_disable)
-               return;
-
-       /*
-        * default_nslabs maybe changed when adjust area number.
-        * So allocate bounce buffer after adjusting area number.
-        */
-       if (!default_nareas)
-               swiotlb_adjust_nareas(num_possible_cpus());
-
-       nslabs = default_nslabs;
-       if (nslabs < IO_TLB_MIN_SLABS)
-               panic("%s: nslabs = %lu too small\n", __func__, nslabs);
-
-       /*
-        * By default allocate the bounce buffer memory from low memory, but
-        * allow to pick a location everywhere for hypervisors with guest
-        * memory encryption.
-        */
 retry:
        bytes = PAGE_ALIGN(nslabs << IO_TLB_SHIFT);
        if (flags & SWIOTLB_ANY)
@@ -346,7 +370,7 @@ void __init swiotlb_init_remap(bool addressing_limit, 
unsigned int flags,
                return;
        }
 
-       if (remap && remap(tlb, nslabs) < 0) {
+       if (remap && remap(tlb, nslabs, flags) < 0) {
                memblock_free(tlb, PAGE_ALIGN(bytes));
 
                nslabs = ALIGN(nslabs >> 1, IO_TLB_SEGSIZE);
@@ -356,19 +380,58 @@ void __init swiotlb_init_remap(bool addressing_limit, 
unsigned int flags,
                goto retry;
        }
 
-       alloc_size = PAGE_ALIGN(array_size(sizeof(*mem->slots), nslabs));
-       mem->slots = memblock_alloc(alloc_size, PAGE_SIZE);
-       if (!mem->slots)
+       alloc_size = PAGE_ALIGN(array_size(sizeof(*zone->slots), nslabs));
+       zone->slots = memblock_alloc(alloc_size, PAGE_SIZE);
+       if (!zone->slots)
                panic("%s: Failed to allocate %zu bytes align=0x%lx\n",
                      __func__, alloc_size, PAGE_SIZE);
 
-       mem->areas = memblock_alloc(array_size(sizeof(struct io_tlb_area),
+       zone->areas = memblock_alloc(array_size(sizeof(struct io_tlb_area),
                default_nareas), SMP_CACHE_BYTES);
-       if (!mem->areas)
-               panic("%s: Failed to allocate mem->areas.\n", __func__);
+       if (!zone->areas)
+               panic("%s: Failed to allocate zone->areas.\n", __func__);
+
+       swiotlb_init_io_tlb_zone(zone, __pa(tlb), nslabs, default_nareas);
+}
+
+/*
+ * Statically reserve bounce buffer space and initialize bounce buffer data
+ * structures for the software IO TLB used to implement the DMA API.
+ */
+void __init swiotlb_init_remap(bool addressing_limit, unsigned int flags,
+               int (*remap)(void *tlb, unsigned long nslabs,
+                            unsigned int flags))
+{
+       struct io_tlb_mem *mem = &io_tlb_default_mem;
 
-       swiotlb_init_io_tlb_mem(mem, __pa(tlb), nslabs, flags, false,
-                               default_nareas);
+       if (!addressing_limit && !swiotlb_force_bounce)
+               return;
+       if (swiotlb_force_disable)
+               return;
+
+       /*
+        * Both default_nslabs and extra_nslabs maybe changed when adjust
+        * area number.
+        * So allocate bounce buffer after adjusting area number.
+        */
+       if (!default_nareas)
+               swiotlb_adjust_nareas(num_possible_cpus());
+
+       if (default_nslabs < IO_TLB_MIN_SLABS)
+               panic("%s: default_nslabs = %lu too small\n", __func__,
+                     default_nslabs);
+
+       swiotlb_init_remap_zone(&mem->zone[SWIOTLB_DF], default_nslabs,
+                               flags & ~SWIOTLB_EXTRA, remap);
+       if (extra_nslabs && (flags & SWIOTLB_EXTRA)) {
+               swiotlb_init_remap_zone(&mem->zone[SWIOTLB_EX],
+                                       extra_nslabs,
+                                       flags | SWIOTLB_ANY, remap);
+               mem->has_extra = true;
+       }
+
+       mem->late_alloc = false;
+       mem->force_bounce = swiotlb_force_bounce || (flags & SWIOTLB_FORCE);
 
        if (flags & SWIOTLB_VERBOSE)
                swiotlb_print_info();
@@ -385,9 +448,11 @@ void __init swiotlb_init(bool addressing_limit, unsigned 
int flags)
  * This should be just like above, but with some error catching.
  */
 int swiotlb_init_late(size_t size, gfp_t gfp_mask,
-               int (*remap)(void *tlb, unsigned long nslabs))
+               int (*remap)(void *tlb, unsigned long nslabs,
+                            unsigned int flags))
 {
        struct io_tlb_mem *mem = &io_tlb_default_mem;
+       struct io_tlb_zone *zone = &mem->zone[SWIOTLB_DF];
        unsigned long nslabs = ALIGN(size >> IO_TLB_SHIFT, IO_TLB_SEGSIZE);
        unsigned char *vstart = NULL;
        unsigned int order, area_order;
@@ -415,7 +480,7 @@ int swiotlb_init_late(size_t size, gfp_t gfp_mask,
                return -ENOMEM;
 
        if (remap)
-               rc = remap(vstart, nslabs);
+               rc = remap(vstart, nslabs, 0);
        if (rc) {
                free_pages((unsigned long)vstart, order);
 
@@ -434,28 +499,31 @@ int swiotlb_init_late(size_t size, gfp_t gfp_mask,
        if (!default_nareas)
                swiotlb_adjust_nareas(num_possible_cpus());
 
-       area_order = get_order(array_size(sizeof(*mem->areas),
+       area_order = get_order(array_size(sizeof(*zone->areas),
                default_nareas));
-       mem->areas = (struct io_tlb_area *)
+       zone->areas = (struct io_tlb_area *)
                __get_free_pages(GFP_KERNEL | __GFP_ZERO, area_order);
-       if (!mem->areas)
+       if (!zone->areas)
                goto error_area;
 
-       mem->slots = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
-               get_order(array_size(sizeof(*mem->slots), nslabs)));
-       if (!mem->slots)
+       zone->slots = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
+               get_order(array_size(sizeof(*zone->slots), nslabs)));
+       if (!zone->slots)
                goto error_slots;
 
        set_memory_decrypted((unsigned long)vstart,
                             (nslabs << IO_TLB_SHIFT) >> PAGE_SHIFT);
-       swiotlb_init_io_tlb_mem(mem, virt_to_phys(vstart), nslabs, 0, true,
-                               default_nareas);
+       swiotlb_init_io_tlb_zone(zone, virt_to_phys(vstart), nslabs,
+                                default_nareas);
+
+       mem->late_alloc = true;
+       mem->force_bounce = swiotlb_force_bounce;
 
        swiotlb_print_info();
        return 0;
 
 error_slots:
-       free_pages((unsigned long)mem->areas, area_order);
+       free_pages((unsigned long)zone->areas, area_order);
 error_area:
        free_pages((unsigned long)vstart, order);
        return -ENOMEM;
@@ -464,33 +532,40 @@ int swiotlb_init_late(size_t size, gfp_t gfp_mask,
 void __init swiotlb_exit(void)
 {
        struct io_tlb_mem *mem = &io_tlb_default_mem;
+       int nr_zone = mem->has_extra ? SWIOTLB_NR : SWIOTLB_NR - 1;
        unsigned long tbl_vaddr;
        size_t tbl_size, slots_size;
        unsigned int area_order;
+       int i;
 
        if (swiotlb_force_bounce)
                return;
 
-       if (!mem->nslabs)
-               return;
+       for (i = 0 ; i < nr_zone ; i++) {
+               struct io_tlb_zone *zone = &mem->zone[i];
 
-       pr_info("tearing down default memory pool\n");
-       tbl_vaddr = (unsigned long)phys_to_virt(mem->start);
-       tbl_size = PAGE_ALIGN(mem->end - mem->start);
-       slots_size = PAGE_ALIGN(array_size(sizeof(*mem->slots), mem->nslabs));
-
-       set_memory_encrypted(tbl_vaddr, tbl_size >> PAGE_SHIFT);
-       if (mem->late_alloc) {
-               area_order = get_order(array_size(sizeof(*mem->areas),
-                       mem->nareas));
-               free_pages((unsigned long)mem->areas, area_order);
-               free_pages(tbl_vaddr, get_order(tbl_size));
-               free_pages((unsigned long)mem->slots, get_order(slots_size));
-       } else {
-               memblock_free_late(__pa(mem->areas),
-                       array_size(sizeof(*mem->areas), mem->nareas));
-               memblock_free_late(mem->start, tbl_size);
-               memblock_free_late(__pa(mem->slots), slots_size);
+               if (!zone->nslabs)
+                       continue;
+
+               pr_info("tearing down %s memory pool\n",
+                       i == 0 ? "default" : "extra");
+               tbl_vaddr = (unsigned long)phys_to_virt(zone->start);
+               tbl_size = PAGE_ALIGN(zone->end - zone->start);
+               slots_size = PAGE_ALIGN(array_size(sizeof(*zone->slots), 
zone->nslabs));
+
+               set_memory_encrypted(tbl_vaddr, tbl_size >> PAGE_SHIFT);
+               if (mem->late_alloc) {
+                       area_order = get_order(array_size(sizeof(*zone->areas),
+                                              zone->nareas));
+                       free_pages((unsigned long)zone->areas, area_order);
+                       free_pages(tbl_vaddr, get_order(tbl_size));
+                       free_pages((unsigned long)zone->slots, 
get_order(slots_size));
+               } else {
+                       memblock_free_late(__pa(zone->areas),
+                               array_size(sizeof(*zone->areas), zone->nareas));
+                       memblock_free_late(zone->start, tbl_size);
+                       memblock_free_late(__pa(zone->slots), slots_size);
+               }
        }
 
        memset(mem, 0, sizeof(*mem));
@@ -507,15 +582,15 @@ static unsigned int swiotlb_align_offset(struct device 
*dev, u64 addr)
 /*
  * Bounce: copy the swiotlb buffer from or back to the original dma location
  */
-static void swiotlb_bounce(struct device *dev, phys_addr_t tlb_addr, size_t 
size,
+static void swiotlb_bounce(struct device *dev, struct io_tlb_zone *zone,
+                          phys_addr_t tlb_addr, size_t size,
                           enum dma_data_direction dir)
 {
-       struct io_tlb_mem *mem = dev->dma_io_tlb_mem;
-       int index = (tlb_addr - mem->start) >> IO_TLB_SHIFT;
-       phys_addr_t orig_addr = mem->slots[index].orig_addr;
-       size_t alloc_size = mem->slots[index].alloc_size;
+       int index = (tlb_addr - zone->start) >> IO_TLB_SHIFT;
+       phys_addr_t orig_addr = zone->slots[index].orig_addr;
+       size_t alloc_size = zone->slots[index].alloc_size;
        unsigned long pfn = PFN_DOWN(orig_addr);
-       unsigned char *vaddr = mem->vaddr + tlb_addr - mem->start;
+       unsigned char *vaddr = zone->vaddr + tlb_addr - zone->start;
        unsigned int tlb_offset, orig_addr_offset;
 
        if (orig_addr == INVALID_PHYS_ADDR)
@@ -579,7 +654,10 @@ static void swiotlb_bounce(struct device *dev, phys_addr_t 
tlb_addr, size_t size
        }
 }
 
-#define slot_addr(start, idx)  ((start) + ((idx) << IO_TLB_SHIFT))
+static inline phys_addr_t slot_addr(phys_addr_t start, phys_addr_t idx)
+{
+       return start + (idx << IO_TLB_SHIFT);
+}
 
 /*
  * Carefully handle integer overflow which can occur when boundary_mask == 
~0UL.
@@ -591,9 +669,10 @@ static inline unsigned long get_max_slots(unsigned long 
boundary_mask)
        return nr_slots(boundary_mask + 1);
 }
 
-static unsigned int wrap_area_index(struct io_tlb_mem *mem, unsigned int index)
+static unsigned int wrap_area_index(struct io_tlb_zone *zone,
+                                   unsigned int index)
 {
-       if (index >= mem->area_nslabs)
+       if (index >= zone->area_nslabs)
                return 0;
        return index;
 }
@@ -602,15 +681,15 @@ static unsigned int wrap_area_index(struct io_tlb_mem 
*mem, unsigned int index)
  * Find a suitable number of IO TLB entries size that will fit this request and
  * allocate a buffer from that IO TLB pool.
  */
-static int swiotlb_do_find_slots(struct device *dev, int area_index,
+static int swiotlb_do_find_slots(struct device *dev,
+               struct io_tlb_zone *zone, int area_index,
                phys_addr_t orig_addr, size_t alloc_size,
                unsigned int alloc_align_mask)
 {
-       struct io_tlb_mem *mem = dev->dma_io_tlb_mem;
-       struct io_tlb_area *area = mem->areas + area_index;
+       struct io_tlb_area *area = zone->areas + area_index;
        unsigned long boundary_mask = dma_get_seg_boundary(dev);
        dma_addr_t tbl_dma_addr =
-               phys_to_dma_unencrypted(dev, mem->start) & boundary_mask;
+               phys_to_dma_unencrypted(dev, zone->start) & boundary_mask;
        unsigned long max_slots = get_max_slots(boundary_mask);
        unsigned int iotlb_align_mask =
                dma_get_min_align_mask(dev) & ~(IO_TLB_SIZE - 1);
@@ -622,7 +701,7 @@ static int swiotlb_do_find_slots(struct device *dev, int 
area_index,
        unsigned int slot_index;
 
        BUG_ON(!nslots);
-       BUG_ON(area_index >= mem->nareas);
+       BUG_ON(area_index >= zone->nareas);
 
        /*
         * For mappings with an alignment requirement don't bother looping to
@@ -635,11 +714,11 @@ static int swiotlb_do_find_slots(struct device *dev, int 
area_index,
        stride = max(stride, (alloc_align_mask >> IO_TLB_SHIFT) + 1);
 
        spin_lock_irqsave(&area->lock, flags);
-       if (unlikely(nslots > mem->area_nslabs - area->used))
+       if (unlikely(nslots > zone->area_nslabs - area->used))
                goto not_found;
 
-       slot_base = area_index * mem->area_nslabs;
-       index = wrap = wrap_area_index(mem, ALIGN(area->index, stride));
+       slot_base = area_index * zone->area_nslabs;
+       index = wrap = wrap_area_index(zone, ALIGN(area->index, stride));
 
        do {
                slot_index = slot_base + index;
@@ -647,7 +726,7 @@ static int swiotlb_do_find_slots(struct device *dev, int 
area_index,
                if (orig_addr &&
                    (slot_addr(tbl_dma_addr, slot_index) &
                     iotlb_align_mask) != (orig_addr & iotlb_align_mask)) {
-                       index = wrap_area_index(mem, index + 1);
+                       index = wrap_area_index(zone, index + 1);
                        continue;
                }
 
@@ -659,10 +738,10 @@ static int swiotlb_do_find_slots(struct device *dev, int 
area_index,
                if (!iommu_is_span_boundary(slot_index, nslots,
                                            nr_slots(tbl_dma_addr),
                                            max_slots)) {
-                       if (mem->slots[slot_index].list >= nslots)
+                       if (zone->slots[slot_index].list >= nslots)
                                goto found;
                }
-               index = wrap_area_index(mem, index + stride);
+               index = wrap_area_index(zone, index + stride);
        } while (index != wrap);
 
 not_found:
@@ -671,19 +750,19 @@ static int swiotlb_do_find_slots(struct device *dev, int 
area_index,
 
 found:
        for (i = slot_index; i < slot_index + nslots; i++) {
-               mem->slots[i].list = 0;
-               mem->slots[i].alloc_size = alloc_size - (offset +
+               zone->slots[i].list = 0;
+               zone->slots[i].alloc_size = alloc_size - (offset +
                                ((i - slot_index) << IO_TLB_SHIFT));
        }
        for (i = slot_index - 1;
             io_tlb_offset(i) != IO_TLB_SEGSIZE - 1 &&
-            mem->slots[i].list; i--)
-               mem->slots[i].list = ++count;
+            zone->slots[i].list; i--)
+               zone->slots[i].list = ++count;
 
        /*
         * Update the indices to avoid searching in the next round.
         */
-       if (index + nslots < mem->area_nslabs)
+       if (index + nslots < zone->area_nslabs)
                area->index = index + nslots;
        else
                area->index = 0;
@@ -692,32 +771,33 @@ static int swiotlb_do_find_slots(struct device *dev, int 
area_index,
        return slot_index;
 }
 
-static int swiotlb_find_slots(struct device *dev, phys_addr_t orig_addr,
+static int swiotlb_find_slots(struct device *dev,
+               struct io_tlb_zone *zone, phys_addr_t orig_addr,
                size_t alloc_size, unsigned int alloc_align_mask)
 {
-       struct io_tlb_mem *mem = dev->dma_io_tlb_mem;
-       int start = raw_smp_processor_id() & (mem->nareas - 1);
+       int start = raw_smp_processor_id() & (zone->nareas - 1);
        int i = start, index;
 
        do {
-               index = swiotlb_do_find_slots(dev, i, orig_addr, alloc_size,
+               index = swiotlb_do_find_slots(dev, zone, i,
+                                             orig_addr, alloc_size,
                                              alloc_align_mask);
                if (index >= 0)
                        return index;
-               if (++i >= mem->nareas)
+               if (++i >= zone->nareas)
                        i = 0;
        } while (i != start);
 
        return -1;
 }
 
-static unsigned long mem_used(struct io_tlb_mem *mem)
+static unsigned long mem_used(struct io_tlb_zone *zone)
 {
        int i;
        unsigned long used = 0;
 
-       for (i = 0; i < mem->nareas; i++)
-               used += mem->areas[i].used;
+       for (i = 0; i < zone->nareas; i++)
+               used += zone->areas[i].used;
        return used;
 }
 
@@ -728,11 +808,13 @@ phys_addr_t swiotlb_tbl_map_single(struct device *dev, 
phys_addr_t orig_addr,
 {
        struct io_tlb_mem *mem = dev->dma_io_tlb_mem;
        unsigned int offset = swiotlb_align_offset(dev, orig_addr);
+       struct io_tlb_zone *zone;
        unsigned int i;
        int index;
        phys_addr_t tlb_addr;
+       unsigned long dma_mask;
 
-       if (!mem || !mem->nslabs)
+       if (!mem || !mem->zone[SWIOTLB_DF].nslabs)
                panic("Can not allocate SWIOTLB buffer earlier and can't now 
provide you with the DMA bounce buffer");
 
        if (cc_platform_has(CC_ATTR_MEM_ENCRYPT))
@@ -744,13 +826,20 @@ phys_addr_t swiotlb_tbl_map_single(struct device *dev, 
phys_addr_t orig_addr,
                return (phys_addr_t)DMA_MAPPING_ERROR;
        }
 
-       index = swiotlb_find_slots(dev, orig_addr,
+       dma_mask = min_not_zero(*dev->dma_mask, dev->bus_dma_limit);
+       if (mem->has_extra && mem->zone[SWIOTLB_EX].nslabs &&
+           dma_mask == DMA_BIT_MASK(64))
+               zone = &mem->zone[SWIOTLB_EX];
+       else
+               zone = &mem->zone[SWIOTLB_DF];
+
+       index = swiotlb_find_slots(dev, zone, orig_addr,
                                   alloc_size + offset, alloc_align_mask);
        if (index == -1) {
                if (!(attrs & DMA_ATTR_NO_WARN))
                        dev_warn_ratelimited(dev,
        "swiotlb buffer is full (sz: %zd bytes), total %lu (slots), used %lu 
(slots)\n",
-                                alloc_size, mem->nslabs, mem_used(mem));
+                                alloc_size, zone->nslabs, mem_used(zone));
                return (phys_addr_t)DMA_MAPPING_ERROR;
        }
 
@@ -760,8 +849,8 @@ phys_addr_t swiotlb_tbl_map_single(struct device *dev, 
phys_addr_t orig_addr,
         * needed.
         */
        for (i = 0; i < nr_slots(alloc_size + offset); i++)
-               mem->slots[index + i].orig_addr = slot_addr(orig_addr, i);
-       tlb_addr = slot_addr(mem->start, index) + offset;
+               zone->slots[index + i].orig_addr = slot_addr(orig_addr, i);
+       tlb_addr = slot_addr(zone->start, index) + offset;
        /*
         * When dir == DMA_FROM_DEVICE we could omit the copy from the orig
         * to the tlb buffer, if we knew for sure the device will
@@ -769,19 +858,19 @@ phys_addr_t swiotlb_tbl_map_single(struct device *dev, 
phys_addr_t orig_addr,
         * unconditional bounce may prevent leaking swiotlb content (i.e.
         * kernel memory) to user-space.
         */
-       swiotlb_bounce(dev, tlb_addr, mapping_size, DMA_TO_DEVICE);
+       swiotlb_bounce(dev, zone, tlb_addr, mapping_size, DMA_TO_DEVICE);
        return tlb_addr;
 }
 
-static void swiotlb_release_slots(struct device *dev, phys_addr_t tlb_addr)
+static void swiotlb_release_slots(struct device *dev,
+               struct io_tlb_zone *zone, phys_addr_t tlb_addr)
 {
-       struct io_tlb_mem *mem = dev->dma_io_tlb_mem;
        unsigned long flags;
        unsigned int offset = swiotlb_align_offset(dev, tlb_addr);
-       int index = (tlb_addr - offset - mem->start) >> IO_TLB_SHIFT;
-       int nslots = nr_slots(mem->slots[index].alloc_size + offset);
-       int aindex = index / mem->area_nslabs;
-       struct io_tlb_area *area = &mem->areas[aindex];
+       int index = (tlb_addr - offset - zone->start) >> IO_TLB_SHIFT;
+       int nslots = nr_slots(zone->slots[index].alloc_size + offset);
+       int aindex = index / zone->area_nslabs;
+       struct io_tlb_area *area = &zone->areas[aindex];
        int count, i;
 
        /*
@@ -790,11 +879,11 @@ static void swiotlb_release_slots(struct device *dev, 
phys_addr_t tlb_addr)
         * While returning the entries to the free list, we merge the entries
         * with slots below and above the pool being returned.
         */
-       BUG_ON(aindex >= mem->nareas);
+       BUG_ON(aindex >= zone->nareas);
 
        spin_lock_irqsave(&area->lock, flags);
        if (index + nslots < ALIGN(index + 1, IO_TLB_SEGSIZE))
-               count = mem->slots[index + nslots].list;
+               count = zone->slots[index + nslots].list;
        else
                count = 0;
 
@@ -803,9 +892,9 @@ static void swiotlb_release_slots(struct device *dev, 
phys_addr_t tlb_addr)
         * superceeding slots
         */
        for (i = index + nslots - 1; i >= index; i--) {
-               mem->slots[i].list = ++count;
-               mem->slots[i].orig_addr = INVALID_PHYS_ADDR;
-               mem->slots[i].alloc_size = 0;
+               zone->slots[i].list = ++count;
+               zone->slots[i].orig_addr = INVALID_PHYS_ADDR;
+               zone->slots[i].alloc_size = 0;
        }
 
        /*
@@ -813,9 +902,9 @@ static void swiotlb_release_slots(struct device *dev, 
phys_addr_t tlb_addr)
         * available (non zero)
         */
        for (i = index - 1;
-            io_tlb_offset(i) != IO_TLB_SEGSIZE - 1 && mem->slots[i].list;
+            io_tlb_offset(i) != IO_TLB_SEGSIZE - 1 && zone->slots[i].list;
             i--)
-               mem->slots[i].list = ++count;
+               zone->slots[i].list = ++count;
        area->used -= nslots;
        spin_unlock_irqrestore(&area->lock, flags);
 }
@@ -827,21 +916,30 @@ void swiotlb_tbl_unmap_single(struct device *dev, 
phys_addr_t tlb_addr,
                              size_t mapping_size, enum dma_data_direction dir,
                              unsigned long attrs)
 {
+       struct io_tlb_mem *mem = dev->dma_io_tlb_mem;
+       int zone_index = swiotlb_zone_index(mem, tlb_addr);
+       struct io_tlb_zone *zone = &mem->zone[zone_index];
+
        /*
         * First, sync the memory before unmapping the entry
         */
        if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) &&
            (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL))
-               swiotlb_bounce(dev, tlb_addr, mapping_size, DMA_FROM_DEVICE);
+               swiotlb_bounce(dev, zone, tlb_addr, mapping_size,
+                              DMA_FROM_DEVICE);
 
-       swiotlb_release_slots(dev, tlb_addr);
+       swiotlb_release_slots(dev, zone, tlb_addr);
 }
 
 void swiotlb_sync_single_for_device(struct device *dev, phys_addr_t tlb_addr,
                size_t size, enum dma_data_direction dir)
 {
+       struct io_tlb_mem *mem = dev->dma_io_tlb_mem;
+       int zone_index = swiotlb_zone_index(mem, tlb_addr);
+       struct io_tlb_zone *zone = &mem->zone[zone_index];
+
        if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL)
-               swiotlb_bounce(dev, tlb_addr, size, DMA_TO_DEVICE);
+               swiotlb_bounce(dev, zone, tlb_addr, size, DMA_TO_DEVICE);
        else
                BUG_ON(dir != DMA_FROM_DEVICE);
 }
@@ -849,8 +947,12 @@ void swiotlb_sync_single_for_device(struct device *dev, 
phys_addr_t tlb_addr,
 void swiotlb_sync_single_for_cpu(struct device *dev, phys_addr_t tlb_addr,
                size_t size, enum dma_data_direction dir)
 {
+       struct io_tlb_mem *mem = dev->dma_io_tlb_mem;
+       int zone_index = swiotlb_zone_index(mem, tlb_addr);
+       struct io_tlb_zone *zone = &mem->zone[zone_index];
+
        if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
-               swiotlb_bounce(dev, tlb_addr, size, DMA_FROM_DEVICE);
+               swiotlb_bounce(dev, zone, tlb_addr, size, DMA_FROM_DEVICE);
        else
                BUG_ON(dir != DMA_TO_DEVICE);
 }
@@ -908,13 +1010,14 @@ bool is_swiotlb_active(struct device *dev)
 {
        struct io_tlb_mem *mem = dev->dma_io_tlb_mem;
 
-       return mem && mem->nslabs;
+       return mem && mem->zone[SWIOTLB_DF].nslabs;
 }
 EXPORT_SYMBOL_GPL(is_swiotlb_active);
 
 static int io_tlb_used_get(void *data, u64 *val)
 {
-       *val = mem_used(&io_tlb_default_mem);
+       struct io_tlb_zone *zone = (struct io_tlb_zone *) data;
+       *val = mem_used(zone);
        return 0;
 }
 DEFINE_DEBUGFS_ATTRIBUTE(fops_io_tlb_used, io_tlb_used_get, NULL, "%llu\n");
@@ -922,13 +1025,26 @@ static int io_tlb_used_get(void *data, u64 *val)
 static void swiotlb_create_debugfs_files(struct io_tlb_mem *mem,
                                         const char *dirname)
 {
+       struct io_tlb_zone *zone;
+
        mem->debugfs = debugfs_create_dir(dirname, io_tlb_default_mem.debugfs);
-       if (!mem->nslabs)
-               return;
 
-       debugfs_create_ulong("io_tlb_nslabs", 0400, mem->debugfs, &mem->nslabs);
-       debugfs_create_file("io_tlb_used", 0400, mem->debugfs, NULL,
-                       &fops_io_tlb_used);
+       zone = &mem->zone[SWIOTLB_DF];
+       if (zone->nslabs) {
+               debugfs_create_ulong("io_tlb_nslabs", 0400,
+                                   mem->debugfs, &zone->nslabs);
+               debugfs_create_file("io_tlb_used", 0400, mem->debugfs,
+                                   zone, &fops_io_tlb_used);
+       }
+
+       if (mem->has_extra) {
+               zone = &mem->zone[SWIOTLB_EX];
+
+               debugfs_create_ulong("io_tlb_nslabs_extra", 0400,
+                                    mem->debugfs, &zone->nslabs);
+               debugfs_create_file("io_tlb_used_extra", 0400,
+                                   mem->debugfs, zone, &fops_io_tlb_used);
+       }
 }
 
 static int __init __maybe_unused swiotlb_create_default_debugfs(void)
@@ -946,17 +1062,20 @@ static int __init __maybe_unused 
swiotlb_create_default_debugfs(void)
 struct page *swiotlb_alloc(struct device *dev, size_t size)
 {
        struct io_tlb_mem *mem = dev->dma_io_tlb_mem;
+       struct io_tlb_zone *zone;
        phys_addr_t tlb_addr;
        int index;
 
        if (!mem)
                return NULL;
 
-       index = swiotlb_find_slots(dev, 0, size, 0);
+       zone = &mem->zone[SWIOTLB_DF];
+
+       index = swiotlb_find_slots(dev, zone, 0, size, 0);
        if (index == -1)
                return NULL;
 
-       tlb_addr = slot_addr(mem->start, index);
+       tlb_addr = slot_addr(zone->start, index);
 
        return pfn_to_page(PFN_DOWN(tlb_addr));
 }
@@ -964,11 +1083,14 @@ struct page *swiotlb_alloc(struct device *dev, size_t 
size)
 bool swiotlb_free(struct device *dev, struct page *page, size_t size)
 {
        phys_addr_t tlb_addr = page_to_phys(page);
+       struct io_tlb_mem *mem = &dev->dma_io_tlb_mem;
+       int zone_index = swiotlb_zone_index(mem, tlb_addr);
+       struct io_tlb_zone *zone = mem->zone[zone_index];
 
        if (!is_swiotlb_buffer(dev, tlb_addr))
                return false;
 
-       swiotlb_release_slots(dev, tlb_addr);
+       swiotlb_release_slots(dev, zone, tlb_addr);
 
        return true;
 }
@@ -977,6 +1099,7 @@ static int rmem_swiotlb_device_init(struct reserved_mem 
*rmem,
                                    struct device *dev)
 {
        struct io_tlb_mem *mem = rmem->priv;
+       struct io_tlb_zone *zone;
        unsigned long nslabs = rmem->size >> IO_TLB_SHIFT;
 
        /* Set Per-device io tlb area to one */
@@ -992,25 +1115,29 @@ static int rmem_swiotlb_device_init(struct reserved_mem 
*rmem,
                if (!mem)
                        return -ENOMEM;
 
-               mem->slots = kcalloc(nslabs, sizeof(*mem->slots), GFP_KERNEL);
-               if (!mem->slots) {
+               zone = &mem->zone[SWIOTLB_DF];
+
+               zone->slots = kcalloc(nslabs, sizeof(*zone->slots), GFP_KERNEL);
+               if (!zone->slots) {
                        kfree(mem);
                        return -ENOMEM;
                }
 
-               mem->areas = kcalloc(nareas, sizeof(*mem->areas),
+               zone->areas = kcalloc(nareas, sizeof(*zone->areas),
                                GFP_KERNEL);
-               if (!mem->areas) {
-                       kfree(mem->slots);
+               if (!zone->areas) {
+                       kfree(zone->slots);
                        kfree(mem);
                        return -ENOMEM;
                }
 
                set_memory_decrypted((unsigned long)phys_to_virt(rmem->base),
                                     rmem->size >> PAGE_SHIFT);
-               swiotlb_init_io_tlb_mem(mem, rmem->base, nslabs, SWIOTLB_FORCE,
-                                       false, nareas);
+               swiotlb_init_io_tlb_zone(zone, rmem->base, nslabs, nareas);
                mem->for_alloc = true;
+               mem->has_extra = false;
+               mem->late_alloc = false;
+               mem->force_bounce = flags & SWIOTLB_FORCE;
 
                rmem->priv = mem;
 
-- 
1.8.3.1




 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.