[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH v2 13/15] drm/amdgpu: Use mmu_range_insert instead of hmm_mirror



From: Jason Gunthorpe <jgg@xxxxxxxxxxxx>

Remove the interval tree in the driver and rely on the tree maintained by
the mmu_notifier for delivering mmu_notifier invalidation callbacks.

For some reason amdgpu has a very complicated arrangement where it tries
to prevent duplicate entries in the interval_tree, this is not necessary,
each amdgpu_bo can be its own stand alone entry. interval_tree already
allows duplicates and overlaps in the tree.

Also, there is no need to remove entries upon a release callback, the
mmu_range API safely allows objects to remain registered beyond the
lifetime of the mm. The driver only has to stop touching the pages during
release.

Cc: Alex Deucher <alexander.deucher@xxxxxxx>
Cc: Christian König <christian.koenig@xxxxxxx>
Cc: David (ChunMing) Zhou <David1.Zhou@xxxxxxx>
Cc: amd-gfx@xxxxxxxxxxxxxxxxxxxxx
Signed-off-by: Jason Gunthorpe <jgg@xxxxxxxxxxxx>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h           |   2 +
 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  |   5 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c    |   1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c        | 341 ++++--------------
 drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h        |   4 -
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.h    |  13 +-
 6 files changed, 84 insertions(+), 282 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index bd37df5dd6d048..60591a5d420021 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1006,6 +1006,8 @@ struct amdgpu_device {
        struct mutex  lock_reset;
        struct amdgpu_doorbell_index doorbell_index;
 
+       struct mutex                    notifier_lock;
+
        int asic_reset_res;
        struct work_struct              xgmi_reset_work;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index 6d021ecc8d598f..47700302a08b7f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -481,8 +481,7 @@ static void remove_kgd_mem_from_kfd_bo_list(struct kgd_mem 
*mem,
  *
  * Returns 0 for success, negative errno for errors.
  */
-static int init_user_pages(struct kgd_mem *mem, struct mm_struct *mm,
-                          uint64_t user_addr)
+static int init_user_pages(struct kgd_mem *mem, uint64_t user_addr)
 {
        struct amdkfd_process_info *process_info = mem->process_info;
        struct amdgpu_bo *bo = mem->bo;
@@ -1195,7 +1194,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
        add_kgd_mem_to_kfd_bo_list(*mem, avm->process_info, user_addr);
 
        if (user_addr) {
-               ret = init_user_pages(*mem, current->mm, user_addr);
+               ret = init_user_pages(*mem, user_addr);
                if (ret)
                        goto allocate_init_user_pages_failed;
        }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 5a1939dbd4e3e6..38f97998aaddb2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -2633,6 +2633,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
        mutex_init(&adev->virt.vf_errors.lock);
        hash_init(adev->mn_hash);
        mutex_init(&adev->lock_reset);
+       mutex_init(&adev->notifier_lock);
        mutex_init(&adev->virt.dpm_mutex);
        mutex_init(&adev->psp.mutex);
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
index 31d4deb5d29484..4ffd7b90f4d907 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
@@ -50,66 +50,6 @@
 #include "amdgpu.h"
 #include "amdgpu_amdkfd.h"
 
-/**
- * struct amdgpu_mn_node
- *
- * @it: interval node defining start-last of the affected address range
- * @bos: list of all BOs in the affected address range
- *
- * Manages all BOs which are affected of a certain range of address space.
- */
-struct amdgpu_mn_node {
-       struct interval_tree_node       it;
-       struct list_head                bos;
-};
-
-/**
- * amdgpu_mn_destroy - destroy the HMM mirror
- *
- * @work: previously sheduled work item
- *
- * Lazy destroys the notifier from a work item
- */
-static void amdgpu_mn_destroy(struct work_struct *work)
-{
-       struct amdgpu_mn *amn = container_of(work, struct amdgpu_mn, work);
-       struct amdgpu_device *adev = amn->adev;
-       struct amdgpu_mn_node *node, *next_node;
-       struct amdgpu_bo *bo, *next_bo;
-
-       mutex_lock(&adev->mn_lock);
-       down_write(&amn->lock);
-       hash_del(&amn->node);
-       rbtree_postorder_for_each_entry_safe(node, next_node,
-                                            &amn->objects.rb_root, it.rb) {
-               list_for_each_entry_safe(bo, next_bo, &node->bos, mn_list) {
-                       bo->mn = NULL;
-                       list_del_init(&bo->mn_list);
-               }
-               kfree(node);
-       }
-       up_write(&amn->lock);
-       mutex_unlock(&adev->mn_lock);
-
-       hmm_mirror_unregister(&amn->mirror);
-       kfree(amn);
-}
-
-/**
- * amdgpu_hmm_mirror_release - callback to notify about mm destruction
- *
- * @mirror: the HMM mirror (mm) this callback is about
- *
- * Shedule a work item to lazy destroy HMM mirror.
- */
-static void amdgpu_hmm_mirror_release(struct hmm_mirror *mirror)
-{
-       struct amdgpu_mn *amn = container_of(mirror, struct amdgpu_mn, mirror);
-
-       INIT_WORK(&amn->work, amdgpu_mn_destroy);
-       schedule_work(&amn->work);
-}
-
 /**
  * amdgpu_mn_lock - take the write side lock for this notifier
  *
@@ -133,157 +73,86 @@ void amdgpu_mn_unlock(struct amdgpu_mn *mn)
 }
 
 /**
- * amdgpu_mn_read_lock - take the read side lock for this notifier
- *
- * @amn: our notifier
- */
-static int amdgpu_mn_read_lock(struct amdgpu_mn *amn, bool blockable)
-{
-       if (blockable)
-               down_read(&amn->lock);
-       else if (!down_read_trylock(&amn->lock))
-               return -EAGAIN;
-
-       return 0;
-}
-
-/**
- * amdgpu_mn_read_unlock - drop the read side lock for this notifier
- *
- * @amn: our notifier
- */
-static void amdgpu_mn_read_unlock(struct amdgpu_mn *amn)
-{
-       up_read(&amn->lock);
-}
-
-/**
- * amdgpu_mn_invalidate_node - unmap all BOs of a node
+ * amdgpu_mn_invalidate_gfx - callback to notify about mm change
  *
- * @node: the node with the BOs to unmap
- * @start: start of address range affected
- * @end: end of address range affected
+ * @mrn: the range (mm) is about to update
+ * @range: details on the invalidation
  *
  * Block for operations on BOs to finish and mark pages as accessed and
  * potentially dirty.
  */
-static void amdgpu_mn_invalidate_node(struct amdgpu_mn_node *node,
-                                     unsigned long start,
-                                     unsigned long end)
+static bool amdgpu_mn_invalidate_gfx(struct mmu_range_notifier *mrn,
+                                    const struct mmu_notifier_range *range)
 {
-       struct amdgpu_bo *bo;
+       struct amdgpu_bo *bo = container_of(mrn, struct amdgpu_bo, notifier);
+       struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
        long r;
 
-       list_for_each_entry(bo, &node->bos, mn_list) {
-
-               if (!amdgpu_ttm_tt_affect_userptr(bo->tbo.ttm, start, end))
-                       continue;
-
-               r = dma_resv_wait_timeout_rcu(bo->tbo.base.resv,
-                       true, false, MAX_SCHEDULE_TIMEOUT);
-               if (r <= 0)
-                       DRM_ERROR("(%ld) failed to wait for user bo\n", r);
-       }
+       /* FIXME: Is this necessary? */
+       if (!amdgpu_ttm_tt_affect_userptr(bo->tbo.ttm, range->start,
+                                         range->end))
+               return true;
+
+       if (!mmu_notifier_range_blockable(range))
+               return false;
+
+       mutex_lock(&adev->notifier_lock);
+       r = dma_resv_wait_timeout_rcu(bo->tbo.base.resv, true, false,
+                                     MAX_SCHEDULE_TIMEOUT);
+       mutex_unlock(&adev->notifier_lock);
+       if (r <= 0)
+               DRM_ERROR("(%ld) failed to wait for user bo\n", r);
+       return true;
 }
 
+static const struct mmu_range_notifier_ops amdgpu_mn_gfx_ops = {
+       .invalidate = amdgpu_mn_invalidate_gfx,
+};
+
 /**
- * amdgpu_mn_sync_pagetables_gfx - callback to notify about mm change
+ * amdgpu_mn_invalidate_hsa - callback to notify about mm change
  *
- * @mirror: the hmm_mirror (mm) is about to update
- * @update: the update start, end address
+ * @mrn: the range (mm) is about to update
+ * @range: details on the invalidation
  *
- * Block for operations on BOs to finish and mark pages as accessed and
- * potentially dirty.
+ * We temporarily evict the BO attached to this range. This necessitates
+ * evicting all user-mode queues of the process.
  */
-static int
-amdgpu_mn_sync_pagetables_gfx(struct hmm_mirror *mirror,
-                             const struct mmu_notifier_range *update)
+static bool amdgpu_mn_invalidate_hsa(struct mmu_range_notifier *mrn,
+                                    const struct mmu_notifier_range *range)
 {
-       struct amdgpu_mn *amn = container_of(mirror, struct amdgpu_mn, mirror);
-       unsigned long start = update->start;
-       unsigned long end = update->end;
-       bool blockable = mmu_notifier_range_blockable(update);
-       struct interval_tree_node *it;
-
-       /* notification is exclusive, but interval is inclusive */
-       end -= 1;
-
-       /* TODO we should be able to split locking for interval tree and
-        * amdgpu_mn_invalidate_node
-        */
-       if (amdgpu_mn_read_lock(amn, blockable))
-               return -EAGAIN;
-
-       it = interval_tree_iter_first(&amn->objects, start, end);
-       while (it) {
-               struct amdgpu_mn_node *node;
-
-               if (!blockable) {
-                       amdgpu_mn_read_unlock(amn);
-                       return -EAGAIN;
-               }
+       struct amdgpu_bo *bo = container_of(mrn, struct amdgpu_bo, notifier);
+       struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
 
-               node = container_of(it, struct amdgpu_mn_node, it);
-               it = interval_tree_iter_next(it, start, end);
+       /* FIXME: Is this necessary? */
+       if (!amdgpu_ttm_tt_affect_userptr(bo->tbo.ttm, range->start,
+                                         range->end))
+               return true;
 
-               amdgpu_mn_invalidate_node(node, start, end);
-       }
+       if (!mmu_notifier_range_blockable(range))
+               return false;
 
-       amdgpu_mn_read_unlock(amn);
+       mutex_lock(&adev->notifier_lock);
+       amdgpu_amdkfd_evict_userptr(bo->kfd_bo, bo->notifier.mm);
+       mutex_unlock(&adev->notifier_lock);
 
-       return 0;
+       return true;
 }
 
-/**
- * amdgpu_mn_sync_pagetables_hsa - callback to notify about mm change
- *
- * @mirror: the hmm_mirror (mm) is about to update
- * @update: the update start, end address
- *
- * We temporarily evict all BOs between start and end. This
- * necessitates evicting all user-mode queues of the process. The BOs
- * are restorted in amdgpu_mn_invalidate_range_end_hsa.
- */
-static int
-amdgpu_mn_sync_pagetables_hsa(struct hmm_mirror *mirror,
-                             const struct mmu_notifier_range *update)
+static const struct mmu_range_notifier_ops amdgpu_mn_hsa_ops = {
+       .invalidate = amdgpu_mn_invalidate_hsa,
+};
+
+static int amdgpu_mn_sync_pagetables(struct hmm_mirror *mirror,
+                                    const struct mmu_notifier_range *update)
 {
        struct amdgpu_mn *amn = container_of(mirror, struct amdgpu_mn, mirror);
-       unsigned long start = update->start;
-       unsigned long end = update->end;
-       bool blockable = mmu_notifier_range_blockable(update);
-       struct interval_tree_node *it;
 
-       /* notification is exclusive, but interval is inclusive */
-       end -= 1;
-
-       if (amdgpu_mn_read_lock(amn, blockable))
-               return -EAGAIN;
-
-       it = interval_tree_iter_first(&amn->objects, start, end);
-       while (it) {
-               struct amdgpu_mn_node *node;
-               struct amdgpu_bo *bo;
-
-               if (!blockable) {
-                       amdgpu_mn_read_unlock(amn);
-                       return -EAGAIN;
-               }
-
-               node = container_of(it, struct amdgpu_mn_node, it);
-               it = interval_tree_iter_next(it, start, end);
-
-               list_for_each_entry(bo, &node->bos, mn_list) {
-                       struct kgd_mem *mem = bo->kfd_bo;
-
-                       if (amdgpu_ttm_tt_affect_userptr(bo->tbo.ttm,
-                                                        start, end))
-                               amdgpu_amdkfd_evict_userptr(mem, amn->mm);
-               }
-       }
-
-       amdgpu_mn_read_unlock(amn);
+       if (!mmu_notifier_range_blockable(update))
+               return false;
 
+       down_read(&amn->lock);
+       up_read(&amn->lock);
        return 0;
 }
 
@@ -295,12 +164,10 @@ amdgpu_mn_sync_pagetables_hsa(struct hmm_mirror *mirror,
 
 static struct hmm_mirror_ops amdgpu_hmm_mirror_ops[] = {
        [AMDGPU_MN_TYPE_GFX] = {
-               .sync_cpu_device_pagetables = amdgpu_mn_sync_pagetables_gfx,
-               .release = amdgpu_hmm_mirror_release
+               .sync_cpu_device_pagetables = amdgpu_mn_sync_pagetables,
        },
        [AMDGPU_MN_TYPE_HSA] = {
-               .sync_cpu_device_pagetables = amdgpu_mn_sync_pagetables_hsa,
-               .release = amdgpu_hmm_mirror_release
+               .sync_cpu_device_pagetables = amdgpu_mn_sync_pagetables,
        },
 };
 
@@ -327,7 +194,8 @@ struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,
        }
 
        hash_for_each_possible(adev->mn_hash, amn, node, key)
-               if (AMDGPU_MN_KEY(amn->mm, amn->type) == key)
+               if (AMDGPU_MN_KEY(amn->mirror.hmm->mmu_notifier.mm,
+                                 amn->type) == key)
                        goto release_locks;
 
        amn = kzalloc(sizeof(*amn), GFP_KERNEL);
@@ -337,10 +205,8 @@ struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,
        }
 
        amn->adev = adev;
-       amn->mm = mm;
        init_rwsem(&amn->lock);
        amn->type = type;
-       amn->objects = RB_ROOT_CACHED;
 
        amn->mirror.ops = &amdgpu_hmm_mirror_ops[type];
        r = hmm_mirror_register(&amn->mirror, mm);
@@ -369,100 +235,33 @@ struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device 
*adev,
  * @bo: amdgpu buffer object
  * @addr: userptr addr we should monitor
  *
- * Registers an HMM mirror for the given BO at the specified address.
+ * Registers a mmu_notifier for the given BO at the specified address.
  * Returns 0 on success, -ERRNO if anything goes wrong.
  */
 int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr)
 {
-       unsigned long end = addr + amdgpu_bo_size(bo) - 1;
-       struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
-       enum amdgpu_mn_type type =
-               bo->kfd_bo ? AMDGPU_MN_TYPE_HSA : AMDGPU_MN_TYPE_GFX;
-       struct amdgpu_mn *amn;
-       struct amdgpu_mn_node *node = NULL, *new_node;
-       struct list_head bos;
-       struct interval_tree_node *it;
-
-       amn = amdgpu_mn_get(adev, type);
-       if (IS_ERR(amn))
-               return PTR_ERR(amn);
-
-       new_node = kmalloc(sizeof(*new_node), GFP_KERNEL);
-       if (!new_node)
-               return -ENOMEM;
-
-       INIT_LIST_HEAD(&bos);
-
-       down_write(&amn->lock);
-
-       while ((it = interval_tree_iter_first(&amn->objects, addr, end))) {
-               kfree(node);
-               node = container_of(it, struct amdgpu_mn_node, it);
-               interval_tree_remove(&node->it, &amn->objects);
-               addr = min(it->start, addr);
-               end = max(it->last, end);
-               list_splice(&node->bos, &bos);
-       }
-
-       if (!node)
-               node = new_node;
+       if (bo->kfd_bo)
+               bo->notifier.ops = &amdgpu_mn_hsa_ops;
        else
-               kfree(new_node);
-
-       bo->mn = amn;
-
-       node->it.start = addr;
-       node->it.last = end;
-       INIT_LIST_HEAD(&node->bos);
-       list_splice(&bos, &node->bos);
-       list_add(&bo->mn_list, &node->bos);
+               bo->notifier.ops = &amdgpu_mn_gfx_ops;
 
-       interval_tree_insert(&node->it, &amn->objects);
-
-       up_write(&amn->lock);
-
-       return 0;
+       return mmu_range_notifier_insert(&bo->notifier, addr,
+                                        amdgpu_bo_size(bo), current->mm);
 }
 
 /**
- * amdgpu_mn_unregister - unregister a BO for HMM mirror updates
+ * amdgpu_mn_unregister - unregister a BO for notifier updates
  *
  * @bo: amdgpu buffer object
  *
- * Remove any registration of HMM mirror updates from the buffer object.
+ * Remove any registration of mmu notifier updates from the buffer object.
  */
 void amdgpu_mn_unregister(struct amdgpu_bo *bo)
 {
-       struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
-       struct amdgpu_mn *amn;
-       struct list_head *head;
-
-       mutex_lock(&adev->mn_lock);
-
-       amn = bo->mn;
-       if (amn == NULL) {
-               mutex_unlock(&adev->mn_lock);
+       if (!bo->notifier.mm)
                return;
-       }
-
-       down_write(&amn->lock);
-
-       /* save the next list entry for later */
-       head = bo->mn_list.next;
-
-       bo->mn = NULL;
-       list_del_init(&bo->mn_list);
-
-       if (list_empty(head)) {
-               struct amdgpu_mn_node *node;
-
-               node = container_of(head, struct amdgpu_mn_node, bos);
-               interval_tree_remove(&node->it, &amn->objects);
-               kfree(node);
-       }
-
-       up_write(&amn->lock);
-       mutex_unlock(&adev->mn_lock);
+       mmu_range_notifier_remove(&bo->notifier);
+       bo->notifier.mm = NULL;
 }
 
 /* flags used by HMM internal, not related to CPU/GPU PTE flags */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
index b8ed68943625c2..d73ab2947b22b2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
@@ -39,12 +39,10 @@ enum amdgpu_mn_type {
  * struct amdgpu_mn
  *
  * @adev: amdgpu device pointer
- * @mm: process address space
  * @type: type of MMU notifier
  * @work: destruction work item
  * @node: hash table node to find structure by adev and mn
  * @lock: rw semaphore protecting the notifier nodes
- * @objects: interval tree containing amdgpu_mn_nodes
  * @mirror: HMM mirror function support
  *
  * Data for each amdgpu device and process address space.
@@ -52,7 +50,6 @@ enum amdgpu_mn_type {
 struct amdgpu_mn {
        /* constant after initialisation */
        struct amdgpu_device    *adev;
-       struct mm_struct        *mm;
        enum amdgpu_mn_type     type;
 
        /* only used on destruction */
@@ -63,7 +60,6 @@ struct amdgpu_mn {
 
        /* objects protected by lock */
        struct rw_semaphore     lock;
-       struct rb_root_cached   objects;
 
 #ifdef CONFIG_HMM_MIRROR
        /* HMM mirror */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
index 658f4c9779b704..4b44ab850f94c2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
@@ -30,6 +30,9 @@
 
 #include <drm/amdgpu_drm.h>
 #include "amdgpu.h"
+#ifdef CONFIG_MMU_NOTIFIER
+#include <linux/mmu_notifier.h>
+#endif
 
 #define AMDGPU_BO_INVALID_OFFSET       LONG_MAX
 #define AMDGPU_BO_MAX_PLACEMENTS       3
@@ -100,10 +103,12 @@ struct amdgpu_bo {
        struct ttm_bo_kmap_obj          dma_buf_vmap;
        struct amdgpu_mn                *mn;
 
-       union {
-               struct list_head        mn_list;
-               struct list_head        shadow_list;
-       };
+
+#ifdef CONFIG_MMU_NOTIFIER
+       struct mmu_range_notifier       notifier;
+#endif
+
+       struct list_head                shadow_list;
 
        struct kgd_mem                  *kfd_bo;
 };
-- 
2.23.0


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxxx
https://lists.xenproject.org/mailman/listinfo/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.