Merge tag 'drm-for-v4.17' of git://people.freedesktop.org/~airlied/linux
[muen/linux.git] / drivers / gpu / drm / amd / amdkfd / kfd_packet_manager.c
1 /*
2  * Copyright 2014 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23
24 #include <linux/slab.h>
25 #include <linux/mutex.h>
26 #include "kfd_device_queue_manager.h"
27 #include "kfd_kernel_queue.h"
28 #include "kfd_priv.h"
29 #include "kfd_pm4_headers_vi.h"
30 #include "kfd_pm4_opcodes.h"
31
32 static inline void inc_wptr(unsigned int *wptr, unsigned int increment_bytes,
33                                 unsigned int buffer_size_bytes)
34 {
35         unsigned int temp = *wptr + increment_bytes / sizeof(uint32_t);
36
37         WARN((temp * sizeof(uint32_t)) > buffer_size_bytes,
38              "Runlist IB overflow");
39         *wptr = temp;
40 }
41
42 static unsigned int build_pm4_header(unsigned int opcode, size_t packet_size)
43 {
44         union PM4_MES_TYPE_3_HEADER header;
45
46         header.u32All = 0;
47         header.opcode = opcode;
48         header.count = packet_size / 4 - 2;
49         header.type = PM4_TYPE_3;
50
51         return header.u32All;
52 }
53
54 static void pm_calc_rlib_size(struct packet_manager *pm,
55                                 unsigned int *rlib_size,
56                                 bool *over_subscription)
57 {
58         unsigned int process_count, queue_count, compute_queue_count;
59         unsigned int map_queue_size;
60         unsigned int max_proc_per_quantum = 1;
61         struct kfd_dev *dev = pm->dqm->dev;
62
63         process_count = pm->dqm->processes_count;
64         queue_count = pm->dqm->queue_count;
65         compute_queue_count = queue_count - pm->dqm->sdma_queue_count;
66
67         /* check if there is over subscription
68          * Note: the arbitration between the number of VMIDs and
69          * hws_max_conc_proc has been done in
70          * kgd2kfd_device_init().
71          */
72         *over_subscription = false;
73
74         if (dev->max_proc_per_quantum > 1)
75                 max_proc_per_quantum = dev->max_proc_per_quantum;
76
77         if ((process_count > max_proc_per_quantum) ||
78             compute_queue_count > get_queues_num(pm->dqm)) {
79                 *over_subscription = true;
80                 pr_debug("Over subscribed runlist\n");
81         }
82
83         map_queue_size = sizeof(struct pm4_mes_map_queues);
84         /* calculate run list ib allocation size */
85         *rlib_size = process_count * sizeof(struct pm4_mes_map_process) +
86                      queue_count * map_queue_size;
87
88         /*
89          * Increase the allocation size in case we need a chained run list
90          * when over subscription
91          */
92         if (*over_subscription)
93                 *rlib_size += sizeof(struct pm4_mes_runlist);
94
95         pr_debug("runlist ib size %d\n", *rlib_size);
96 }
97
98 static int pm_allocate_runlist_ib(struct packet_manager *pm,
99                                 unsigned int **rl_buffer,
100                                 uint64_t *rl_gpu_buffer,
101                                 unsigned int *rl_buffer_size,
102                                 bool *is_over_subscription)
103 {
104         int retval;
105
106         if (WARN_ON(pm->allocated))
107                 return -EINVAL;
108
109         pm_calc_rlib_size(pm, rl_buffer_size, is_over_subscription);
110
111         retval = kfd_gtt_sa_allocate(pm->dqm->dev, *rl_buffer_size,
112                                         &pm->ib_buffer_obj);
113
114         if (retval) {
115                 pr_err("Failed to allocate runlist IB\n");
116                 return retval;
117         }
118
119         *(void **)rl_buffer = pm->ib_buffer_obj->cpu_ptr;
120         *rl_gpu_buffer = pm->ib_buffer_obj->gpu_addr;
121
122         memset(*rl_buffer, 0, *rl_buffer_size);
123         pm->allocated = true;
124         return retval;
125 }
126
127 static int pm_create_runlist(struct packet_manager *pm, uint32_t *buffer,
128                         uint64_t ib, size_t ib_size_in_dwords, bool chain)
129 {
130         struct pm4_mes_runlist *packet;
131         int concurrent_proc_cnt = 0;
132         struct kfd_dev *kfd = pm->dqm->dev;
133
134         if (WARN_ON(!ib))
135                 return -EFAULT;
136
137         /* Determine the number of processes to map together to HW:
138          * it can not exceed the number of VMIDs available to the
139          * scheduler, and it is determined by the smaller of the number
140          * of processes in the runlist and kfd module parameter
141          * hws_max_conc_proc.
142          * Note: the arbitration between the number of VMIDs and
143          * hws_max_conc_proc has been done in
144          * kgd2kfd_device_init().
145          */
146         concurrent_proc_cnt = min(pm->dqm->processes_count,
147                         kfd->max_proc_per_quantum);
148
149         packet = (struct pm4_mes_runlist *)buffer;
150
151         memset(buffer, 0, sizeof(struct pm4_mes_runlist));
152         packet->header.u32All = build_pm4_header(IT_RUN_LIST,
153                                                 sizeof(struct pm4_mes_runlist));
154
155         packet->bitfields4.ib_size = ib_size_in_dwords;
156         packet->bitfields4.chain = chain ? 1 : 0;
157         packet->bitfields4.offload_polling = 0;
158         packet->bitfields4.valid = 1;
159         packet->bitfields4.process_cnt = concurrent_proc_cnt;
160         packet->ordinal2 = lower_32_bits(ib);
161         packet->bitfields3.ib_base_hi = upper_32_bits(ib);
162
163         return 0;
164 }
165
166 static int pm_create_map_process(struct packet_manager *pm, uint32_t *buffer,
167                                 struct qcm_process_device *qpd)
168 {
169         struct pm4_mes_map_process *packet;
170
171         packet = (struct pm4_mes_map_process *)buffer;
172
173         memset(buffer, 0, sizeof(struct pm4_mes_map_process));
174
175         packet->header.u32All = build_pm4_header(IT_MAP_PROCESS,
176                                         sizeof(struct pm4_mes_map_process));
177         packet->bitfields2.diq_enable = (qpd->is_debug) ? 1 : 0;
178         packet->bitfields2.process_quantum = 1;
179         packet->bitfields2.pasid = qpd->pqm->process->pasid;
180         packet->bitfields3.page_table_base = qpd->page_table_base;
181         packet->bitfields10.gds_size = qpd->gds_size;
182         packet->bitfields10.num_gws = qpd->num_gws;
183         packet->bitfields10.num_oac = qpd->num_oac;
184         packet->bitfields10.num_queues = (qpd->is_debug) ? 0 : qpd->queue_count;
185
186         packet->sh_mem_config = qpd->sh_mem_config;
187         packet->sh_mem_bases = qpd->sh_mem_bases;
188         packet->sh_mem_ape1_base = qpd->sh_mem_ape1_base;
189         packet->sh_mem_ape1_limit = qpd->sh_mem_ape1_limit;
190
191         packet->sh_hidden_private_base_vmid = qpd->sh_hidden_private_base;
192
193         packet->gds_addr_lo = lower_32_bits(qpd->gds_context_area);
194         packet->gds_addr_hi = upper_32_bits(qpd->gds_context_area);
195
196         return 0;
197 }
198
199 static int pm_create_map_queue(struct packet_manager *pm, uint32_t *buffer,
200                 struct queue *q, bool is_static)
201 {
202         struct pm4_mes_map_queues *packet;
203         bool use_static = is_static;
204
205         packet = (struct pm4_mes_map_queues *)buffer;
206         memset(buffer, 0, sizeof(struct pm4_mes_map_queues));
207
208         packet->header.u32All = build_pm4_header(IT_MAP_QUEUES,
209                                                 sizeof(struct pm4_mes_map_queues));
210         packet->bitfields2.alloc_format =
211                 alloc_format__mes_map_queues__one_per_pipe_vi;
212         packet->bitfields2.num_queues = 1;
213         packet->bitfields2.queue_sel =
214                 queue_sel__mes_map_queues__map_to_hws_determined_queue_slots_vi;
215
216         packet->bitfields2.engine_sel =
217                 engine_sel__mes_map_queues__compute_vi;
218         packet->bitfields2.queue_type =
219                 queue_type__mes_map_queues__normal_compute_vi;
220
221         switch (q->properties.type) {
222         case KFD_QUEUE_TYPE_COMPUTE:
223                 if (use_static)
224                         packet->bitfields2.queue_type =
225                 queue_type__mes_map_queues__normal_latency_static_queue_vi;
226                 break;
227         case KFD_QUEUE_TYPE_DIQ:
228                 packet->bitfields2.queue_type =
229                         queue_type__mes_map_queues__debug_interface_queue_vi;
230                 break;
231         case KFD_QUEUE_TYPE_SDMA:
232                 packet->bitfields2.engine_sel = q->properties.sdma_engine_id +
233                                 engine_sel__mes_map_queues__sdma0_vi;
234                 use_static = false; /* no static queues under SDMA */
235                 break;
236         default:
237                 WARN(1, "queue type %d", q->properties.type);
238                 return -EINVAL;
239         }
240         packet->bitfields3.doorbell_offset =
241                         q->properties.doorbell_off;
242
243         packet->mqd_addr_lo =
244                         lower_32_bits(q->gart_mqd_addr);
245
246         packet->mqd_addr_hi =
247                         upper_32_bits(q->gart_mqd_addr);
248
249         packet->wptr_addr_lo =
250                         lower_32_bits((uint64_t)q->properties.write_ptr);
251
252         packet->wptr_addr_hi =
253                         upper_32_bits((uint64_t)q->properties.write_ptr);
254
255         return 0;
256 }
257
258 static int pm_create_runlist_ib(struct packet_manager *pm,
259                                 struct list_head *queues,
260                                 uint64_t *rl_gpu_addr,
261                                 size_t *rl_size_bytes)
262 {
263         unsigned int alloc_size_bytes;
264         unsigned int *rl_buffer, rl_wptr, i;
265         int retval, proccesses_mapped;
266         struct device_process_node *cur;
267         struct qcm_process_device *qpd;
268         struct queue *q;
269         struct kernel_queue *kq;
270         bool is_over_subscription;
271
272         rl_wptr = retval = proccesses_mapped = 0;
273
274         retval = pm_allocate_runlist_ib(pm, &rl_buffer, rl_gpu_addr,
275                                 &alloc_size_bytes, &is_over_subscription);
276         if (retval)
277                 return retval;
278
279         *rl_size_bytes = alloc_size_bytes;
280         pm->ib_size_bytes = alloc_size_bytes;
281
282         pr_debug("Building runlist ib process count: %d queues count %d\n",
283                 pm->dqm->processes_count, pm->dqm->queue_count);
284
285         /* build the run list ib packet */
286         list_for_each_entry(cur, queues, list) {
287                 qpd = cur->qpd;
288                 /* build map process packet */
289                 if (proccesses_mapped >= pm->dqm->processes_count) {
290                         pr_debug("Not enough space left in runlist IB\n");
291                         pm_release_ib(pm);
292                         return -ENOMEM;
293                 }
294
295                 retval = pm_create_map_process(pm, &rl_buffer[rl_wptr], qpd);
296                 if (retval)
297                         return retval;
298
299                 proccesses_mapped++;
300                 inc_wptr(&rl_wptr, sizeof(struct pm4_mes_map_process),
301                                 alloc_size_bytes);
302
303                 list_for_each_entry(kq, &qpd->priv_queue_list, list) {
304                         if (!kq->queue->properties.is_active)
305                                 continue;
306
307                         pr_debug("static_queue, mapping kernel q %d, is debug status %d\n",
308                                 kq->queue->queue, qpd->is_debug);
309
310                         retval = pm_create_map_queue(pm,
311                                                 &rl_buffer[rl_wptr],
312                                                 kq->queue,
313                                                 qpd->is_debug);
314                         if (retval)
315                                 return retval;
316
317                         inc_wptr(&rl_wptr,
318                                 sizeof(struct pm4_mes_map_queues),
319                                 alloc_size_bytes);
320                 }
321
322                 list_for_each_entry(q, &qpd->queues_list, list) {
323                         if (!q->properties.is_active)
324                                 continue;
325
326                         pr_debug("static_queue, mapping user queue %d, is debug status %d\n",
327                                 q->queue, qpd->is_debug);
328
329                         retval = pm_create_map_queue(pm,
330                                                 &rl_buffer[rl_wptr],
331                                                 q,
332                                                 qpd->is_debug);
333
334                         if (retval)
335                                 return retval;
336
337                         inc_wptr(&rl_wptr,
338                                 sizeof(struct pm4_mes_map_queues),
339                                 alloc_size_bytes);
340                 }
341         }
342
343         pr_debug("Finished map process and queues to runlist\n");
344
345         if (is_over_subscription)
346                 retval = pm_create_runlist(pm, &rl_buffer[rl_wptr],
347                                         *rl_gpu_addr,
348                                         alloc_size_bytes / sizeof(uint32_t),
349                                         true);
350
351         for (i = 0; i < alloc_size_bytes / sizeof(uint32_t); i++)
352                 pr_debug("0x%2X ", rl_buffer[i]);
353         pr_debug("\n");
354
355         return retval;
356 }
357
358 /* pm_create_release_mem - Create a RELEASE_MEM packet and return the size
359  *     of this packet
360  *     @gpu_addr - GPU address of the packet. It's a virtual address.
361  *     @buffer - buffer to fill up with the packet. It's a CPU kernel pointer
362  *     Return - length of the packet
363  */
364 uint32_t pm_create_release_mem(uint64_t gpu_addr, uint32_t *buffer)
365 {
366         struct pm4_mec_release_mem *packet;
367
368         WARN_ON(!buffer);
369
370         packet = (struct pm4_mec_release_mem *)buffer;
371         memset(buffer, 0, sizeof(*packet));
372
373         packet->header.u32All = build_pm4_header(IT_RELEASE_MEM,
374                                                  sizeof(*packet));
375
376         packet->bitfields2.event_type = CACHE_FLUSH_AND_INV_TS_EVENT;
377         packet->bitfields2.event_index = event_index___release_mem__end_of_pipe;
378         packet->bitfields2.tcl1_action_ena = 1;
379         packet->bitfields2.tc_action_ena = 1;
380         packet->bitfields2.cache_policy = cache_policy___release_mem__lru;
381         packet->bitfields2.atc = 0;
382
383         packet->bitfields3.data_sel = data_sel___release_mem__send_32_bit_low;
384         packet->bitfields3.int_sel =
385                 int_sel___release_mem__send_interrupt_after_write_confirm;
386
387         packet->bitfields4.address_lo_32b = (gpu_addr & 0xffffffff) >> 2;
388         packet->address_hi = upper_32_bits(gpu_addr);
389
390         packet->data_lo = 0;
391
392         return sizeof(*packet) / sizeof(unsigned int);
393 }
394
395 int pm_init(struct packet_manager *pm, struct device_queue_manager *dqm)
396 {
397         pm->dqm = dqm;
398         mutex_init(&pm->lock);
399         pm->priv_queue = kernel_queue_init(dqm->dev, KFD_QUEUE_TYPE_HIQ);
400         if (!pm->priv_queue) {
401                 mutex_destroy(&pm->lock);
402                 return -ENOMEM;
403         }
404         pm->allocated = false;
405
406         return 0;
407 }
408
409 void pm_uninit(struct packet_manager *pm)
410 {
411         mutex_destroy(&pm->lock);
412         kernel_queue_uninit(pm->priv_queue);
413 }
414
415 int pm_send_set_resources(struct packet_manager *pm,
416                                 struct scheduling_resources *res)
417 {
418         struct pm4_mes_set_resources *packet;
419         int retval = 0;
420
421         mutex_lock(&pm->lock);
422         pm->priv_queue->ops.acquire_packet_buffer(pm->priv_queue,
423                                         sizeof(*packet) / sizeof(uint32_t),
424                                         (unsigned int **)&packet);
425         if (!packet) {
426                 pr_err("Failed to allocate buffer on kernel queue\n");
427                 retval = -ENOMEM;
428                 goto out;
429         }
430
431         memset(packet, 0, sizeof(struct pm4_mes_set_resources));
432         packet->header.u32All = build_pm4_header(IT_SET_RESOURCES,
433                                         sizeof(struct pm4_mes_set_resources));
434
435         packet->bitfields2.queue_type =
436                         queue_type__mes_set_resources__hsa_interface_queue_hiq;
437         packet->bitfields2.vmid_mask = res->vmid_mask;
438         packet->bitfields2.unmap_latency = KFD_UNMAP_LATENCY_MS / 100;
439         packet->bitfields7.oac_mask = res->oac_mask;
440         packet->bitfields8.gds_heap_base = res->gds_heap_base;
441         packet->bitfields8.gds_heap_size = res->gds_heap_size;
442
443         packet->gws_mask_lo = lower_32_bits(res->gws_mask);
444         packet->gws_mask_hi = upper_32_bits(res->gws_mask);
445
446         packet->queue_mask_lo = lower_32_bits(res->queue_mask);
447         packet->queue_mask_hi = upper_32_bits(res->queue_mask);
448
449         pm->priv_queue->ops.submit_packet(pm->priv_queue);
450
451 out:
452         mutex_unlock(&pm->lock);
453
454         return retval;
455 }
456
457 int pm_send_runlist(struct packet_manager *pm, struct list_head *dqm_queues)
458 {
459         uint64_t rl_gpu_ib_addr;
460         uint32_t *rl_buffer;
461         size_t rl_ib_size, packet_size_dwords;
462         int retval;
463
464         retval = pm_create_runlist_ib(pm, dqm_queues, &rl_gpu_ib_addr,
465                                         &rl_ib_size);
466         if (retval)
467                 goto fail_create_runlist_ib;
468
469         pr_debug("runlist IB address: 0x%llX\n", rl_gpu_ib_addr);
470
471         packet_size_dwords = sizeof(struct pm4_mes_runlist) / sizeof(uint32_t);
472         mutex_lock(&pm->lock);
473
474         retval = pm->priv_queue->ops.acquire_packet_buffer(pm->priv_queue,
475                                         packet_size_dwords, &rl_buffer);
476         if (retval)
477                 goto fail_acquire_packet_buffer;
478
479         retval = pm_create_runlist(pm, rl_buffer, rl_gpu_ib_addr,
480                                         rl_ib_size / sizeof(uint32_t), false);
481         if (retval)
482                 goto fail_create_runlist;
483
484         pm->priv_queue->ops.submit_packet(pm->priv_queue);
485
486         mutex_unlock(&pm->lock);
487
488         return retval;
489
490 fail_create_runlist:
491         pm->priv_queue->ops.rollback_packet(pm->priv_queue);
492 fail_acquire_packet_buffer:
493         mutex_unlock(&pm->lock);
494 fail_create_runlist_ib:
495         pm_release_ib(pm);
496         return retval;
497 }
498
499 int pm_send_query_status(struct packet_manager *pm, uint64_t fence_address,
500                         uint32_t fence_value)
501 {
502         int retval;
503         struct pm4_mes_query_status *packet;
504
505         if (WARN_ON(!fence_address))
506                 return -EFAULT;
507
508         mutex_lock(&pm->lock);
509         retval = pm->priv_queue->ops.acquire_packet_buffer(
510                         pm->priv_queue,
511                         sizeof(struct pm4_mes_query_status) / sizeof(uint32_t),
512                         (unsigned int **)&packet);
513         if (retval)
514                 goto fail_acquire_packet_buffer;
515
516         packet->header.u32All = build_pm4_header(IT_QUERY_STATUS,
517                                         sizeof(struct pm4_mes_query_status));
518
519         packet->bitfields2.context_id = 0;
520         packet->bitfields2.interrupt_sel =
521                         interrupt_sel__mes_query_status__completion_status;
522         packet->bitfields2.command =
523                         command__mes_query_status__fence_only_after_write_ack;
524
525         packet->addr_hi = upper_32_bits((uint64_t)fence_address);
526         packet->addr_lo = lower_32_bits((uint64_t)fence_address);
527         packet->data_hi = upper_32_bits((uint64_t)fence_value);
528         packet->data_lo = lower_32_bits((uint64_t)fence_value);
529
530         pm->priv_queue->ops.submit_packet(pm->priv_queue);
531
532 fail_acquire_packet_buffer:
533         mutex_unlock(&pm->lock);
534         return retval;
535 }
536
537 int pm_send_unmap_queue(struct packet_manager *pm, enum kfd_queue_type type,
538                         enum kfd_unmap_queues_filter filter,
539                         uint32_t filter_param, bool reset,
540                         unsigned int sdma_engine)
541 {
542         int retval;
543         uint32_t *buffer;
544         struct pm4_mes_unmap_queues *packet;
545
546         mutex_lock(&pm->lock);
547         retval = pm->priv_queue->ops.acquire_packet_buffer(
548                         pm->priv_queue,
549                         sizeof(struct pm4_mes_unmap_queues) / sizeof(uint32_t),
550                         &buffer);
551         if (retval)
552                 goto err_acquire_packet_buffer;
553
554         packet = (struct pm4_mes_unmap_queues *)buffer;
555         memset(buffer, 0, sizeof(struct pm4_mes_unmap_queues));
556         pr_debug("static_queue: unmapping queues: filter is %d , reset is %d , type is %d\n",
557                 filter, reset, type);
558         packet->header.u32All = build_pm4_header(IT_UNMAP_QUEUES,
559                                         sizeof(struct pm4_mes_unmap_queues));
560         switch (type) {
561         case KFD_QUEUE_TYPE_COMPUTE:
562         case KFD_QUEUE_TYPE_DIQ:
563                 packet->bitfields2.engine_sel =
564                         engine_sel__mes_unmap_queues__compute;
565                 break;
566         case KFD_QUEUE_TYPE_SDMA:
567                 packet->bitfields2.engine_sel =
568                         engine_sel__mes_unmap_queues__sdma0 + sdma_engine;
569                 break;
570         default:
571                 WARN(1, "queue type %d", type);
572                 retval = -EINVAL;
573                 goto err_invalid;
574         }
575
576         if (reset)
577                 packet->bitfields2.action =
578                                 action__mes_unmap_queues__reset_queues;
579         else
580                 packet->bitfields2.action =
581                                 action__mes_unmap_queues__preempt_queues;
582
583         switch (filter) {
584         case KFD_UNMAP_QUEUES_FILTER_SINGLE_QUEUE:
585                 packet->bitfields2.queue_sel =
586                                 queue_sel__mes_unmap_queues__perform_request_on_specified_queues;
587                 packet->bitfields2.num_queues = 1;
588                 packet->bitfields3b.doorbell_offset0 = filter_param;
589                 break;
590         case KFD_UNMAP_QUEUES_FILTER_BY_PASID:
591                 packet->bitfields2.queue_sel =
592                                 queue_sel__mes_unmap_queues__perform_request_on_pasid_queues;
593                 packet->bitfields3a.pasid = filter_param;
594                 break;
595         case KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES:
596                 packet->bitfields2.queue_sel =
597                                 queue_sel__mes_unmap_queues__unmap_all_queues;
598                 break;
599         case KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES:
600                 /* in this case, we do not preempt static queues */
601                 packet->bitfields2.queue_sel =
602                                 queue_sel__mes_unmap_queues__unmap_all_non_static_queues;
603                 break;
604         default:
605                 WARN(1, "filter %d", filter);
606                 retval = -EINVAL;
607                 goto err_invalid;
608         }
609
610         pm->priv_queue->ops.submit_packet(pm->priv_queue);
611
612         mutex_unlock(&pm->lock);
613         return 0;
614
615 err_invalid:
616         pm->priv_queue->ops.rollback_packet(pm->priv_queue);
617 err_acquire_packet_buffer:
618         mutex_unlock(&pm->lock);
619         return retval;
620 }
621
622 void pm_release_ib(struct packet_manager *pm)
623 {
624         mutex_lock(&pm->lock);
625         if (pm->allocated) {
626                 kfd_gtt_sa_free(pm->dqm->dev, pm->ib_buffer_obj);
627                 pm->allocated = false;
628         }
629         mutex_unlock(&pm->lock);
630 }
631
632 #if defined(CONFIG_DEBUG_FS)
633
634 int pm_debugfs_runlist(struct seq_file *m, void *data)
635 {
636         struct packet_manager *pm = data;
637
638         mutex_lock(&pm->lock);
639
640         if (!pm->allocated) {
641                 seq_puts(m, "  No active runlist\n");
642                 goto out;
643         }
644
645         seq_hex_dump(m, "  ", DUMP_PREFIX_OFFSET, 32, 4,
646                      pm->ib_buffer_obj->cpu_ptr, pm->ib_size_bytes, false);
647
648 out:
649         mutex_unlock(&pm->lock);
650         return 0;
651 }
652
653 #endif