2 * Copyright 2014 Advanced Micro Devices, Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
23 #include <linux/mutex.h>
24 #include <linux/log2.h>
25 #include <linux/sched.h>
26 #include <linux/sched/mm.h>
27 #include <linux/sched/task.h>
28 #include <linux/slab.h>
29 #include <linux/amd-iommu.h>
30 #include <linux/notifier.h>
31 #include <linux/compat.h>
32 #include <linux/mman.h>
37 #include "kfd_device_queue_manager.h"
38 #include "kfd_dbgmgr.h"
39 #include "kfd_iommu.h"
42 * List of struct kfd_process (field kfd_process).
43 * Unique/indexed by mm_struct*
45 DEFINE_HASHTABLE(kfd_processes_table, KFD_PROCESS_TABLE_SIZE);
46 static DEFINE_MUTEX(kfd_processes_mutex);
48 DEFINE_SRCU(kfd_processes_srcu);
50 static struct workqueue_struct *kfd_process_wq;
52 static struct kfd_process *find_process(const struct task_struct *thread);
53 static void kfd_process_ref_release(struct kref *ref);
54 static struct kfd_process *create_process(const struct task_struct *thread,
56 static int kfd_process_init_cwsr(struct kfd_process *p, struct file *filep);
58 static void evict_process_worker(struct work_struct *work);
59 static void restore_process_worker(struct work_struct *work);
62 void kfd_process_create_wq(void)
65 kfd_process_wq = alloc_workqueue("kfd_process_wq", 0, 0);
68 void kfd_process_destroy_wq(void)
71 destroy_workqueue(kfd_process_wq);
72 kfd_process_wq = NULL;
76 struct kfd_process *kfd_create_process(struct file *filep)
78 struct kfd_process *process;
79 struct task_struct *thread = current;
82 return ERR_PTR(-EINVAL);
84 /* Only the pthreads threading model is supported. */
85 if (thread->group_leader->mm != thread->mm)
86 return ERR_PTR(-EINVAL);
89 * take kfd processes mutex before starting of process creation
90 * so there won't be a case where two threads of the same process
91 * create two kfd_process structures
93 mutex_lock(&kfd_processes_mutex);
95 /* A prior open of /dev/kfd could have already created the process. */
96 process = find_process(thread);
98 pr_debug("Process already found\n");
100 process = create_process(thread, filep);
102 mutex_unlock(&kfd_processes_mutex);
107 struct kfd_process *kfd_get_process(const struct task_struct *thread)
109 struct kfd_process *process;
112 return ERR_PTR(-EINVAL);
114 /* Only the pthreads threading model is supported. */
115 if (thread->group_leader->mm != thread->mm)
116 return ERR_PTR(-EINVAL);
118 process = find_process(thread);
123 static struct kfd_process *find_process_by_mm(const struct mm_struct *mm)
125 struct kfd_process *process;
127 hash_for_each_possible_rcu(kfd_processes_table, process,
128 kfd_processes, (uintptr_t)mm)
129 if (process->mm == mm)
135 static struct kfd_process *find_process(const struct task_struct *thread)
137 struct kfd_process *p;
140 idx = srcu_read_lock(&kfd_processes_srcu);
141 p = find_process_by_mm(thread->mm);
142 srcu_read_unlock(&kfd_processes_srcu, idx);
147 void kfd_unref_process(struct kfd_process *p)
149 kref_put(&p->ref, kfd_process_ref_release);
152 static void kfd_process_destroy_pdds(struct kfd_process *p)
154 struct kfd_process_device *pdd, *temp;
156 list_for_each_entry_safe(pdd, temp, &p->per_device_data,
158 pr_debug("Releasing pdd (topology id %d) for process (pasid %d)\n",
159 pdd->dev->id, p->pasid);
162 pdd->dev->kfd2kgd->destroy_process_vm(
163 pdd->dev->kgd, pdd->vm);
165 list_del(&pdd->per_device_list);
167 if (pdd->qpd.cwsr_kaddr)
168 free_pages((unsigned long)pdd->qpd.cwsr_kaddr,
169 get_order(KFD_CWSR_TBA_TMA_SIZE));
175 /* No process locking is needed in this function, because the process
176 * is not findable any more. We must assume that no other thread is
177 * using it any more, otherwise we couldn't safely free the process
178 * structure in the end.
180 static void kfd_process_wq_release(struct work_struct *work)
182 struct kfd_process *p = container_of(work, struct kfd_process,
185 kfd_iommu_unbind_process(p);
187 kfd_process_destroy_pdds(p);
188 dma_fence_put(p->ef);
190 kfd_event_free_process(p);
192 kfd_pasid_free(p->pasid);
193 kfd_free_process_doorbells(p);
195 mutex_destroy(&p->mutex);
197 put_task_struct(p->lead_thread);
202 static void kfd_process_ref_release(struct kref *ref)
204 struct kfd_process *p = container_of(ref, struct kfd_process, ref);
206 INIT_WORK(&p->release_work, kfd_process_wq_release);
207 queue_work(kfd_process_wq, &p->release_work);
210 static void kfd_process_destroy_delayed(struct rcu_head *rcu)
212 struct kfd_process *p = container_of(rcu, struct kfd_process, rcu);
214 kfd_unref_process(p);
217 static void kfd_process_notifier_release(struct mmu_notifier *mn,
218 struct mm_struct *mm)
220 struct kfd_process *p;
221 struct kfd_process_device *pdd = NULL;
224 * The kfd_process structure can not be free because the
225 * mmu_notifier srcu is read locked
227 p = container_of(mn, struct kfd_process, mmu_notifier);
228 if (WARN_ON(p->mm != mm))
231 mutex_lock(&kfd_processes_mutex);
232 hash_del_rcu(&p->kfd_processes);
233 mutex_unlock(&kfd_processes_mutex);
234 synchronize_srcu(&kfd_processes_srcu);
236 cancel_delayed_work_sync(&p->eviction_work);
237 cancel_delayed_work_sync(&p->restore_work);
239 mutex_lock(&p->mutex);
241 /* Iterate over all process device data structures and if the
242 * pdd is in debug mode, we should first force unregistration,
243 * then we will be able to destroy the queues
245 list_for_each_entry(pdd, &p->per_device_data, per_device_list) {
246 struct kfd_dev *dev = pdd->dev;
248 mutex_lock(kfd_get_dbgmgr_mutex());
249 if (dev && dev->dbgmgr && dev->dbgmgr->pasid == p->pasid) {
250 if (!kfd_dbgmgr_unregister(dev->dbgmgr, p)) {
251 kfd_dbgmgr_destroy(dev->dbgmgr);
255 mutex_unlock(kfd_get_dbgmgr_mutex());
258 kfd_process_dequeue_from_all_devices(p);
261 /* Indicate to other users that MM is no longer valid */
264 mutex_unlock(&p->mutex);
266 mmu_notifier_unregister_no_release(&p->mmu_notifier, mm);
267 mmu_notifier_call_srcu(&p->rcu, &kfd_process_destroy_delayed);
270 static const struct mmu_notifier_ops kfd_process_mmu_notifier_ops = {
271 .release = kfd_process_notifier_release,
274 static int kfd_process_init_cwsr(struct kfd_process *p, struct file *filep)
276 unsigned long offset;
277 struct kfd_process_device *pdd = NULL;
278 struct kfd_dev *dev = NULL;
279 struct qcm_process_device *qpd = NULL;
281 list_for_each_entry(pdd, &p->per_device_data, per_device_list) {
284 if (!dev->cwsr_enabled || qpd->cwsr_kaddr)
286 offset = (dev->id | KFD_MMAP_RESERVED_MEM_MASK) << PAGE_SHIFT;
287 qpd->tba_addr = (int64_t)vm_mmap(filep, 0,
288 KFD_CWSR_TBA_TMA_SIZE, PROT_READ | PROT_EXEC,
291 if (IS_ERR_VALUE(qpd->tba_addr)) {
292 int err = qpd->tba_addr;
294 pr_err("Failure to set tba address. error %d.\n", err);
296 qpd->cwsr_kaddr = NULL;
300 memcpy(qpd->cwsr_kaddr, dev->cwsr_isa, dev->cwsr_isa_size);
302 qpd->tma_addr = qpd->tba_addr + KFD_CWSR_TMA_OFFSET;
303 pr_debug("set tba :0x%llx, tma:0x%llx, cwsr_kaddr:%p for pqm.\n",
304 qpd->tba_addr, qpd->tma_addr, qpd->cwsr_kaddr);
310 static struct kfd_process *create_process(const struct task_struct *thread,
313 struct kfd_process *process;
316 process = kzalloc(sizeof(*process), GFP_KERNEL);
319 goto err_alloc_process;
321 process->pasid = kfd_pasid_alloc();
322 if (process->pasid == 0)
323 goto err_alloc_pasid;
325 if (kfd_alloc_process_doorbells(process) < 0)
326 goto err_alloc_doorbells;
328 kref_init(&process->ref);
330 mutex_init(&process->mutex);
332 process->mm = thread->mm;
334 /* register notifier */
335 process->mmu_notifier.ops = &kfd_process_mmu_notifier_ops;
336 err = mmu_notifier_register(&process->mmu_notifier, process->mm);
338 goto err_mmu_notifier;
340 hash_add_rcu(kfd_processes_table, &process->kfd_processes,
341 (uintptr_t)process->mm);
343 process->lead_thread = thread->group_leader;
344 get_task_struct(process->lead_thread);
346 INIT_LIST_HEAD(&process->per_device_data);
348 kfd_event_init_process(process);
350 err = pqm_init(&process->pqm, process);
352 goto err_process_pqm_init;
354 /* init process apertures*/
355 process->is_32bit_user_mode = in_compat_syscall();
356 err = kfd_init_apertures(process);
358 goto err_init_apertures;
360 INIT_DELAYED_WORK(&process->eviction_work, evict_process_worker);
361 INIT_DELAYED_WORK(&process->restore_work, restore_process_worker);
362 process->last_restore_timestamp = get_jiffies_64();
364 err = kfd_process_init_cwsr(process, filep);
371 kfd_process_destroy_pdds(process);
373 pqm_uninit(&process->pqm);
374 err_process_pqm_init:
375 hash_del_rcu(&process->kfd_processes);
377 mmu_notifier_unregister_no_release(&process->mmu_notifier, process->mm);
379 mutex_destroy(&process->mutex);
380 kfd_free_process_doorbells(process);
382 kfd_pasid_free(process->pasid);
389 struct kfd_process_device *kfd_get_process_device_data(struct kfd_dev *dev,
390 struct kfd_process *p)
392 struct kfd_process_device *pdd = NULL;
394 list_for_each_entry(pdd, &p->per_device_data, per_device_list)
401 struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev,
402 struct kfd_process *p)
404 struct kfd_process_device *pdd = NULL;
406 pdd = kzalloc(sizeof(*pdd), GFP_KERNEL);
411 INIT_LIST_HEAD(&pdd->qpd.queues_list);
412 INIT_LIST_HEAD(&pdd->qpd.priv_queue_list);
413 pdd->qpd.dqm = dev->dqm;
414 pdd->qpd.pqm = &p->pqm;
415 pdd->qpd.evicted = 0;
417 pdd->bound = PDD_UNBOUND;
418 pdd->already_dequeued = false;
419 list_add(&pdd->per_device_list, &p->per_device_data);
421 /* Create the GPUVM context for this specific device */
422 if (dev->kfd2kgd->create_process_vm(dev->kgd, &pdd->vm,
423 &p->kgd_process_info, &p->ef)) {
424 pr_err("Failed to create process VM object\n");
430 list_del(&pdd->per_device_list);
436 * Direct the IOMMU to bind the process (specifically the pasid->mm)
438 * Unbinding occurs when the process dies or the device is removed.
440 * Assumes that the process lock is held.
442 struct kfd_process_device *kfd_bind_process_to_device(struct kfd_dev *dev,
443 struct kfd_process *p)
445 struct kfd_process_device *pdd;
448 pdd = kfd_get_process_device_data(dev, p);
450 pr_err("Process device data doesn't exist\n");
451 return ERR_PTR(-ENOMEM);
454 err = kfd_iommu_bind_process_to_device(pdd);
461 struct kfd_process_device *kfd_get_first_process_device_data(
462 struct kfd_process *p)
464 return list_first_entry(&p->per_device_data,
465 struct kfd_process_device,
469 struct kfd_process_device *kfd_get_next_process_device_data(
470 struct kfd_process *p,
471 struct kfd_process_device *pdd)
473 if (list_is_last(&pdd->per_device_list, &p->per_device_data))
475 return list_next_entry(pdd, per_device_list);
478 bool kfd_has_process_device_data(struct kfd_process *p)
480 return !(list_empty(&p->per_device_data));
483 /* This increments the process->ref counter. */
484 struct kfd_process *kfd_lookup_process_by_pasid(unsigned int pasid)
486 struct kfd_process *p, *ret_p = NULL;
489 int idx = srcu_read_lock(&kfd_processes_srcu);
491 hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) {
492 if (p->pasid == pasid) {
499 srcu_read_unlock(&kfd_processes_srcu, idx);
504 /* This increments the process->ref counter. */
505 struct kfd_process *kfd_lookup_process_by_mm(const struct mm_struct *mm)
507 struct kfd_process *p;
509 int idx = srcu_read_lock(&kfd_processes_srcu);
511 p = find_process_by_mm(mm);
515 srcu_read_unlock(&kfd_processes_srcu, idx);
520 /* process_evict_queues - Evict all user queues of a process
522 * Eviction is reference-counted per process-device. This means multiple
523 * evictions from different sources can be nested safely.
525 static int process_evict_queues(struct kfd_process *p)
527 struct kfd_process_device *pdd;
529 unsigned int n_evicted = 0;
531 list_for_each_entry(pdd, &p->per_device_data, per_device_list) {
532 r = pdd->dev->dqm->ops.evict_process_queues(pdd->dev->dqm,
535 pr_err("Failed to evict process queues\n");
544 /* To keep state consistent, roll back partial eviction by
547 list_for_each_entry(pdd, &p->per_device_data, per_device_list) {
550 if (pdd->dev->dqm->ops.restore_process_queues(pdd->dev->dqm,
552 pr_err("Failed to restore queues\n");
560 /* process_restore_queues - Restore all user queues of a process */
561 static int process_restore_queues(struct kfd_process *p)
563 struct kfd_process_device *pdd;
566 list_for_each_entry(pdd, &p->per_device_data, per_device_list) {
567 r = pdd->dev->dqm->ops.restore_process_queues(pdd->dev->dqm,
570 pr_err("Failed to restore process queues\n");
579 static void evict_process_worker(struct work_struct *work)
582 struct kfd_process *p;
583 struct delayed_work *dwork;
585 dwork = to_delayed_work(work);
587 /* Process termination destroys this worker thread. So during the
588 * lifetime of this thread, kfd_process p will be valid
590 p = container_of(dwork, struct kfd_process, eviction_work);
591 WARN_ONCE(p->last_eviction_seqno != p->ef->seqno,
592 "Eviction fence mismatch\n");
594 /* Narrow window of overlap between restore and evict work
595 * item is possible. Once amdgpu_amdkfd_gpuvm_restore_process_bos
596 * unreserves KFD BOs, it is possible to evicted again. But
597 * restore has few more steps of finish. So lets wait for any
598 * previous restore work to complete
600 flush_delayed_work(&p->restore_work);
602 pr_debug("Started evicting pasid %d\n", p->pasid);
603 ret = process_evict_queues(p);
605 dma_fence_signal(p->ef);
606 dma_fence_put(p->ef);
608 schedule_delayed_work(&p->restore_work,
609 msecs_to_jiffies(PROCESS_RESTORE_TIME_MS));
611 pr_debug("Finished evicting pasid %d\n", p->pasid);
613 pr_err("Failed to evict queues of pasid %d\n", p->pasid);
616 static void restore_process_worker(struct work_struct *work)
618 struct delayed_work *dwork;
619 struct kfd_process *p;
620 struct kfd_process_device *pdd;
623 dwork = to_delayed_work(work);
625 /* Process termination destroys this worker thread. So during the
626 * lifetime of this thread, kfd_process p will be valid
628 p = container_of(dwork, struct kfd_process, restore_work);
630 /* Call restore_process_bos on the first KGD device. This function
631 * takes care of restoring the whole process including other devices.
632 * Restore can fail if enough memory is not available. If so,
635 pdd = list_first_entry(&p->per_device_data,
636 struct kfd_process_device,
639 pr_debug("Started restoring pasid %d\n", p->pasid);
641 /* Setting last_restore_timestamp before successful restoration.
642 * Otherwise this would have to be set by KGD (restore_process_bos)
643 * before KFD BOs are unreserved. If not, the process can be evicted
644 * again before the timestamp is set.
645 * If restore fails, the timestamp will be set again in the next
646 * attempt. This would mean that the minimum GPU quanta would be
647 * PROCESS_ACTIVE_TIME_MS - (time to execute the following two
651 p->last_restore_timestamp = get_jiffies_64();
652 ret = pdd->dev->kfd2kgd->restore_process_bos(p->kgd_process_info,
655 pr_debug("Failed to restore BOs of pasid %d, retry after %d ms\n",
656 p->pasid, PROCESS_BACK_OFF_TIME_MS);
657 ret = schedule_delayed_work(&p->restore_work,
658 msecs_to_jiffies(PROCESS_BACK_OFF_TIME_MS));
659 WARN(!ret, "reschedule restore work failed\n");
663 ret = process_restore_queues(p);
665 pr_debug("Finished restoring pasid %d\n", p->pasid);
667 pr_err("Failed to restore queues of pasid %d\n", p->pasid);
670 void kfd_suspend_all_processes(void)
672 struct kfd_process *p;
674 int idx = srcu_read_lock(&kfd_processes_srcu);
676 hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) {
677 cancel_delayed_work_sync(&p->eviction_work);
678 cancel_delayed_work_sync(&p->restore_work);
680 if (process_evict_queues(p))
681 pr_err("Failed to suspend process %d\n", p->pasid);
682 dma_fence_signal(p->ef);
683 dma_fence_put(p->ef);
686 srcu_read_unlock(&kfd_processes_srcu, idx);
689 int kfd_resume_all_processes(void)
691 struct kfd_process *p;
693 int ret = 0, idx = srcu_read_lock(&kfd_processes_srcu);
695 hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) {
696 if (!schedule_delayed_work(&p->restore_work, 0)) {
697 pr_err("Restore process %d failed during resume\n",
702 srcu_read_unlock(&kfd_processes_srcu, idx);
706 int kfd_reserved_mem_mmap(struct kfd_process *process,
707 struct vm_area_struct *vma)
709 struct kfd_dev *dev = kfd_device_by_id(vma->vm_pgoff);
710 struct kfd_process_device *pdd;
711 struct qcm_process_device *qpd;
715 if ((vma->vm_end - vma->vm_start) != KFD_CWSR_TBA_TMA_SIZE) {
716 pr_err("Incorrect CWSR mapping size.\n");
720 pdd = kfd_get_process_device_data(dev, process);
725 qpd->cwsr_kaddr = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
726 get_order(KFD_CWSR_TBA_TMA_SIZE));
727 if (!qpd->cwsr_kaddr) {
728 pr_err("Error allocating per process CWSR buffer.\n");
732 vma->vm_flags |= VM_IO | VM_DONTCOPY | VM_DONTEXPAND
733 | VM_NORESERVE | VM_DONTDUMP | VM_PFNMAP;
734 /* Mapping pages to user process */
735 return remap_pfn_range(vma, vma->vm_start,
736 PFN_DOWN(__pa(qpd->cwsr_kaddr)),
737 KFD_CWSR_TBA_TMA_SIZE, vma->vm_page_prot);
740 void kfd_flush_tlb(struct kfd_process_device *pdd)
742 struct kfd_dev *dev = pdd->dev;
743 const struct kfd2kgd_calls *f2g = dev->kfd2kgd;
745 if (dev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) {
746 /* Nothing to flush until a VMID is assigned, which
747 * only happens when the first queue is created.
750 f2g->invalidate_tlbs_vmid(dev->kgd, pdd->qpd.vmid);
752 f2g->invalidate_tlbs(dev->kgd, pdd->process->pasid);
756 #if defined(CONFIG_DEBUG_FS)
758 int kfd_debugfs_mqds_by_process(struct seq_file *m, void *data)
760 struct kfd_process *p;
764 int idx = srcu_read_lock(&kfd_processes_srcu);
766 hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) {
767 seq_printf(m, "Process %d PASID %d:\n",
768 p->lead_thread->tgid, p->pasid);
770 mutex_lock(&p->mutex);
771 r = pqm_debugfs_mqds(m, &p->pqm);
772 mutex_unlock(&p->mutex);
778 srcu_read_unlock(&kfd_processes_srcu, idx);