powerpc/mm_iommu: Fix potential deadlock

Currently mm_iommu_do_alloc() is called in 2 cases: - VFIO_IOMMU_SPAPR_REGISTER_MEMORY ioctl() for normal memory; - vfio_pci_nvgpu_regops::mmap() for GPU memory. One of the differences here is that the mmap() is called with mm::mmap_sem help and mm_iommu_do_alloc() locks mm::mmap_sem itself (when adjusting locked_vm and when pinning pages) which can potentially cause a deadlock. We did not hit this yet because the mmap() path does not adjust locked_vm and does not pin pages. However with CONFIG_DEBUG_LOCKDEP=y there is an annoying warning (below, it is slightly confusing). This makes a few changes to reduce the amount of time spent under a lock. This holds mem_list_mutex only when looking or changing the mem list. This means the list is checked twice now for the normal memory case - before starting pinning and before adding the item to the list. This changes mm_iommu_do_alloc() to only allocate and add an iommu memory descriptor (used to deal with both normal and GPU memory in a rather messy way). This cleans the code in a way that mm_iommu_new() and mm_iommu_do_alloc() do not need to test for (dev_hpa != MM_IOMMU_TABLE_INVALID_HPA) which makes the code simpler. This moves locked_vm decrementing from under mem_list_mutex for the same reasons. This is one of the lockdep warnings: ====================================================== WARNING: possible circular locking dependency detected 5.1.0-rc2-le_nv2_aikATfstn1-p1 torvalds#363 Not tainted ------------------------------------------------------ qemu-system-ppc/8038 is trying to acquire lock: 000000002ec6c453 (mem_list_mutex){+.+.}, at: mm_iommu_do_alloc+0x70/0x490 but task is already holding lock: 00000000fd7da97f (&mm->mmap_sem){++++}, at: vm_mmap_pgoff+0xf0/0x160 which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #1 (&mm->mmap_sem){++++}: lock_acquire+0xf8/0x260 down_write+0x44/0xa0 mm_iommu_adjust_locked_vm.part.1+0x4c/0x190 mm_iommu_do_alloc+0x310/0x490 tce_iommu_ioctl.part.9+0xb84/0x1150 [vfio_iommu_spapr_tce] vfio_fops_unl_ioctl+0x94/0x430 [vfio] do_vfs_ioctl+0xe4/0x930 ksys_ioctl+0xc4/0x110 sys_ioctl+0x28/0x80 system_call+0x5c/0x70 -> #0 (mem_list_mutex){+.+.}: __lock_acquire+0x1484/0x1900 lock_acquire+0xf8/0x260 __mutex_lock+0x88/0xa70 mm_iommu_do_alloc+0x70/0x490 vfio_pci_nvgpu_mmap+0xc0/0x130 [vfio_pci] vfio_pci_mmap+0x198/0x2a0 [vfio_pci] vfio_device_fops_mmap+0x44/0x70 [vfio] mmap_region+0x5d4/0x770 do_mmap+0x42c/0x650 vm_mmap_pgoff+0x124/0x160 ksys_mmap_pgoff+0xdc/0x2f0 sys_mmap+0x40/0x80 system_call+0x5c/0x70 other info that might help us debug this: Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(&mm->mmap_sem); lock(mem_list_mutex); lock(&mm->mmap_sem); lock(mem_list_mutex); *** DEADLOCK *** 1 lock held by qemu-system-ppc/8038: #0: 00000000fd7da97f (&mm->mmap_sem){++++}, at: vm_mmap_pgoff+0xf0/0x160 Signed-off-by: Alexey Kardashevskiy <[email protected]>
0day-ci · Mar 29, 2019 · 4852b5e · 4852b5e
1 parent a31753f
commit 4852b5e
Showing 1 changed file with 64 additions and 62 deletions.
diff --git a/arch/powerpc/mm/mmu_context_iommu.c b/arch/powerpc/mm/mmu_context_iommu.c
@@ -96,50 +96,59 @@ static bool mm_iommu_find(struct mm_struct *mm, unsigned long ua,
 	return false;
 }
 
-static long mm_iommu_do_alloc(struct mm_struct *mm, unsigned long ua,
-			      unsigned long entries, unsigned long dev_hpa,
-			      struct mm_iommu_table_group_mem_t **pmem)
+/* Must be called with &mem_list_mutex held */
+static int mm_iommu_do_alloc(struct mm_struct *mm, unsigned long ua,
+		unsigned long entries, unsigned long dev_hpa,
+		unsigned int mem_pageshift, phys_addr_t *hpas,
+		struct mm_iommu_table_group_mem_t **pmem)
 {
 	struct mm_iommu_table_group_mem_t *mem;
-	long i, ret, locked_entries = 0;
+
+	if (mm_iommu_find(mm, ua, entries))
+		return -EINVAL;
+
+	mem = kzalloc(sizeof(*mem), GFP_KERNEL);
+	if (!mem)
+		return -ENOMEM;
+
+	atomic64_set(&mem->mapped, 1);
+	mem->dev_hpa = dev_hpa;
+	mem->used = 1;
+	mem->ua = ua;
+	mem->entries = entries;
+	mem->pageshift = mem_pageshift;
+	mem->hpas = hpas;
+	list_add_rcu(&mem->next, &mm->context.iommu_group_mem_list);
+	*pmem = mem;
+
+	return 0;
+}
+
+long mm_iommu_new(struct mm_struct *mm, unsigned long ua, unsigned long entries,
+		struct mm_iommu_table_group_mem_t **pmem)
+{
+	long i, ret = 0, locked_entries = 0;
 	unsigned int pageshift, mem_pageshift;
 	struct page **hpages;
 	phys_addr_t *hpas;
 
 	mutex_lock(&mem_list_mutex);
-
 	if (mm_iommu_find(mm, ua, entries)) {
-		ret = -EINVAL;
-		goto unlock_exit;
-	}
-
-	if (dev_hpa == MM_IOMMU_TABLE_INVALID_HPA) {
-		ret = mm_iommu_adjust_locked_vm(mm, entries, true);
-		if (ret)
-			goto unlock_exit;
-
-		locked_entries = entries;
+		mutex_unlock(&mem_list_mutex);
+		return -EINVAL;
 	}
+	mutex_unlock(&mem_list_mutex);
 
-	mem = kzalloc(sizeof(*mem), GFP_KERNEL);
-	if (!mem) {
-		ret = -ENOMEM;
-		goto unlock_exit;
-	}
+	ret = mm_iommu_adjust_locked_vm(mm, entries, true);
+	if (ret)
+		return ret;
 
-	if (dev_hpa != MM_IOMMU_TABLE_INVALID_HPA) {
-		mem_pageshift = __ffs(dev_hpa | (entries << PAGE_SHIFT));
-		hpas = NULL;
-		mem->dev_hpa = dev_hpa;
-		goto good_exit;
-	}
-	mem->dev_hpa = MM_IOMMU_TABLE_INVALID_HPA;
+	locked_entries = entries;
 
 	hpages = vzalloc(array_size(entries, sizeof(hpages[0])));
 	if (!hpages) {
-		kfree(mem);
 		ret = -ENOMEM;
-		goto unlock_exit;
+		goto cleanup_exit;
 	}
 
 	down_read(&mm->mmap_sem);
@@ -149,11 +158,8 @@ static long mm_iommu_do_alloc(struct mm_struct *mm, unsigned long ua,
 		/* free the reference taken */
 		for (i = 0; i < ret; i++)
 			put_page(hpages[i]);
-
-		vfree(hpages);
-		kfree(mem);
 		ret = -EFAULT;
-		goto unlock_exit;
+		goto cleanup_exit;
 	}
 
 	/*
@@ -184,40 +190,35 @@ static long mm_iommu_do_alloc(struct mm_struct *mm, unsigned long ua,
 		hpas[i] = page_to_pfn(page) << PAGE_SHIFT;
 	}
 
-good_exit:
-	ret = 0;
-	atomic64_set(&mem->mapped, 1);
-	mem->used = 1;
-	mem->ua = ua;
-	mem->entries = entries;
-	mem->hpas = hpas;
-	mem->pageshift = mem_pageshift;
-	*pmem = mem;
-
-	list_add_rcu(&mem->next, &mm->context.iommu_group_mem_list);
+	mutex_lock(&mem_list_mutex);
+	ret = mm_iommu_do_alloc(mm, ua, entries, MM_IOMMU_TABLE_INVALID_HPA,
+			mem_pageshift, hpas, pmem);
+	mutex_unlock(&mem_list_mutex);
 
-unlock_exit:
-	if (locked_entries && ret)
-		mm_iommu_adjust_locked_vm(mm, locked_entries, false);
+	if (ret)
+		goto cleanup_exit;
+	return 0;
 
-	mutex_unlock(&mem_list_mutex);
+cleanup_exit:
+	mm_iommu_adjust_locked_vm(mm, locked_entries, false);
+	vfree(hpages);
 
 	return ret;
 }
-
-long mm_iommu_new(struct mm_struct *mm, unsigned long ua, unsigned long entries,
-		struct mm_iommu_table_group_mem_t **pmem)
-{
-	return mm_iommu_do_alloc(mm, ua, entries, MM_IOMMU_TABLE_INVALID_HPA,
-			pmem);
-}
 EXPORT_SYMBOL_GPL(mm_iommu_new);
 
 long mm_iommu_newdev(struct mm_struct *mm, unsigned long ua,
 		unsigned long entries, unsigned long dev_hpa,
 		struct mm_iommu_table_group_mem_t **pmem)
 {
-	return mm_iommu_do_alloc(mm, ua, entries, dev_hpa, pmem);
+	int ret;
+
+	mutex_lock(&mem_list_mutex);
+	ret = mm_iommu_do_alloc(mm, ua, entries, dev_hpa,
+			__ffs(dev_hpa | (entries << PAGE_SHIFT)), NULL, pmem);
+	mutex_unlock(&mem_list_mutex);
+
+	return ret;
 }
 EXPORT_SYMBOL_GPL(mm_iommu_newdev);
 
@@ -270,10 +271,13 @@ static void mm_iommu_release(struct mm_iommu_table_group_mem_t *mem)
 long mm_iommu_put(struct mm_struct *mm, struct mm_iommu_table_group_mem_t *mem)
 {
 	long ret = 0;
-	unsigned long entries, dev_hpa;
+	unsigned long unlock_entries = 0;
 
 	mutex_lock(&mem_list_mutex);
 
+	if (mem->dev_hpa == MM_IOMMU_TABLE_INVALID_HPA)
+		unlock_entries = mem->entries;
+
 	if (mem->used == 0) {
 		ret = -ENOENT;
 		goto unlock_exit;
@@ -292,16 +296,14 @@ long mm_iommu_put(struct mm_struct *mm, struct mm_iommu_table_group_mem_t *mem)
 	}
 
 	/* @mapped became 0 so now mappings are disabled, release the region */
-	entries = mem->entries;
-	dev_hpa = mem->dev_hpa;
 	mm_iommu_release(mem);
 
-	if (dev_hpa == MM_IOMMU_TABLE_INVALID_HPA)
-		mm_iommu_adjust_locked_vm(mm, entries, false);
-
 unlock_exit:
 	mutex_unlock(&mem_list_mutex);
 
+	if (!ret)
+		mm_iommu_adjust_locked_vm(mm, unlock_entries, false);
+
 	return ret;
 }
 EXPORT_SYMBOL_GPL(mm_iommu_put);