757cf4e9de330e21169d5b6b92efd8afc759800f
[muen/linux.git] / drivers / net / wireless / intel / iwlwifi / pcie / tx.c
1 /******************************************************************************
2  *
3  * This file is provided under a dual BSD/GPLv2 license.  When using or
4  * redistributing this file, you may do so under either license.
5  *
6  * GPL LICENSE SUMMARY
7  *
8  * Copyright(c) 2003 - 2014 Intel Corporation. All rights reserved.
9  * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH
10  * Copyright(c) 2016 - 2017 Intel Deutschland GmbH
11  * Copyright(c) 2018 - 2020 Intel Corporation
12  *
13  * This program is free software; you can redistribute it and/or modify it
14  * under the terms of version 2 of the GNU General Public License as
15  * published by the Free Software Foundation.
16  *
17  * This program is distributed in the hope that it will be useful, but WITHOUT
18  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
19  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
20  * more details.
21  *
22  * The full GNU General Public License is included in this distribution in the
23  * file called COPYING.
24  *
25  * Contact Information:
26  *  Intel Linux Wireless <linuxwifi@intel.com>
27  * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
28  *
29  * BSD LICENSE
30  *
31  * Copyright(c) 2003 - 2014 Intel Corporation. All rights reserved.
32  * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH
33  * Copyright(c) 2016 - 2017 Intel Deutschland GmbH
34  * Copyright(c) 2018 - 2020 Intel Corporation
35  * All rights reserved.
36  *
37  * Redistribution and use in source and binary forms, with or without
38  * modification, are permitted provided that the following conditions
39  * are met:
40  *
41  *  * Redistributions of source code must retain the above copyright
42  *    notice, this list of conditions and the following disclaimer.
43  *  * Redistributions in binary form must reproduce the above copyright
44  *    notice, this list of conditions and the following disclaimer in
45  *    the documentation and/or other materials provided with the
46  *    distribution.
47  *  * Neither the name Intel Corporation nor the names of its
48  *    contributors may be used to endorse or promote products derived
49  *    from this software without specific prior written permission.
50  *
51  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
52  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
53  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
54  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
55  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
56  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
57  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
58  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
59  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
60  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
61  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
62  *
63  *****************************************************************************/
64 #include <linux/etherdevice.h>
65 #include <linux/ieee80211.h>
66 #include <linux/slab.h>
67 #include <linux/sched.h>
68 #include <net/ip6_checksum.h>
69 #include <net/tso.h>
70
71 #include "iwl-debug.h"
72 #include "iwl-csr.h"
73 #include "iwl-prph.h"
74 #include "iwl-io.h"
75 #include "iwl-scd.h"
76 #include "iwl-op-mode.h"
77 #include "internal.h"
78 #include "fw/api/tx.h"
79
80 #define IWL_TX_CRC_SIZE 4
81 #define IWL_TX_DELIMITER_SIZE 4
82
83 /*************** DMA-QUEUE-GENERAL-FUNCTIONS  *****
84  * DMA services
85  *
86  * Theory of operation
87  *
88  * A Tx or Rx queue resides in host DRAM, and is comprised of a circular buffer
89  * of buffer descriptors, each of which points to one or more data buffers for
90  * the device to read from or fill.  Driver and device exchange status of each
91  * queue via "read" and "write" pointers.  Driver keeps minimum of 2 empty
92  * entries in each circular buffer, to protect against confusing empty and full
93  * queue states.
94  *
95  * The device reads or writes the data in the queues via the device's several
96  * DMA/FIFO channels.  Each queue is mapped to a single DMA channel.
97  *
98  * For Tx queue, there are low mark and high mark limits. If, after queuing
99  * the packet for Tx, free space become < low mark, Tx queue stopped. When
100  * reclaiming packets (on 'tx done IRQ), if free space become > high mark,
101  * Tx queue resumed.
102  *
103  ***************************************************/
104
105 int iwl_queue_space(struct iwl_trans *trans, const struct iwl_txq *q)
106 {
107         unsigned int max;
108         unsigned int used;
109
110         /*
111          * To avoid ambiguity between empty and completely full queues, there
112          * should always be less than max_tfd_queue_size elements in the queue.
113          * If q->n_window is smaller than max_tfd_queue_size, there is no need
114          * to reserve any queue entries for this purpose.
115          */
116         if (q->n_window < trans->trans_cfg->base_params->max_tfd_queue_size)
117                 max = q->n_window;
118         else
119                 max = trans->trans_cfg->base_params->max_tfd_queue_size - 1;
120
121         /*
122          * max_tfd_queue_size is a power of 2, so the following is equivalent to
123          * modulo by max_tfd_queue_size and is well defined.
124          */
125         used = (q->write_ptr - q->read_ptr) &
126                 (trans->trans_cfg->base_params->max_tfd_queue_size - 1);
127
128         if (WARN_ON(used > max))
129                 return 0;
130
131         return max - used;
132 }
133
134 /*
135  * iwl_queue_init - Initialize queue's high/low-water and read/write indexes
136  */
137 static int iwl_queue_init(struct iwl_txq *q, int slots_num)
138 {
139         q->n_window = slots_num;
140
141         /* slots_num must be power-of-two size, otherwise
142          * iwl_pcie_get_cmd_index is broken. */
143         if (WARN_ON(!is_power_of_2(slots_num)))
144                 return -EINVAL;
145
146         q->low_mark = q->n_window / 4;
147         if (q->low_mark < 4)
148                 q->low_mark = 4;
149
150         q->high_mark = q->n_window / 8;
151         if (q->high_mark < 2)
152                 q->high_mark = 2;
153
154         q->write_ptr = 0;
155         q->read_ptr = 0;
156
157         return 0;
158 }
159
160 int iwl_pcie_alloc_dma_ptr(struct iwl_trans *trans,
161                            struct iwl_dma_ptr *ptr, size_t size)
162 {
163         if (WARN_ON(ptr->addr))
164                 return -EINVAL;
165
166         ptr->addr = dma_alloc_coherent(trans->dev, size,
167                                        &ptr->dma, GFP_KERNEL);
168         if (!ptr->addr)
169                 return -ENOMEM;
170         ptr->size = size;
171         return 0;
172 }
173
174 void iwl_pcie_free_dma_ptr(struct iwl_trans *trans, struct iwl_dma_ptr *ptr)
175 {
176         if (unlikely(!ptr->addr))
177                 return;
178
179         dma_free_coherent(trans->dev, ptr->size, ptr->addr, ptr->dma);
180         memset(ptr, 0, sizeof(*ptr));
181 }
182
183 static void iwl_pcie_txq_stuck_timer(struct timer_list *t)
184 {
185         struct iwl_txq *txq = from_timer(txq, t, stuck_timer);
186         struct iwl_trans *trans = txq->trans;
187
188         spin_lock(&txq->lock);
189         /* check if triggered erroneously */
190         if (txq->read_ptr == txq->write_ptr) {
191                 spin_unlock(&txq->lock);
192                 return;
193         }
194         spin_unlock(&txq->lock);
195
196         iwl_trans_pcie_log_scd_error(trans, txq);
197
198         iwl_force_nmi(trans);
199 }
200
201 /*
202  * iwl_pcie_txq_update_byte_cnt_tbl - Set up entry in Tx byte-count array
203  */
204 static void iwl_pcie_txq_update_byte_cnt_tbl(struct iwl_trans *trans,
205                                              struct iwl_txq *txq, u16 byte_cnt,
206                                              int num_tbs)
207 {
208         struct iwlagn_scd_bc_tbl *scd_bc_tbl;
209         struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
210         int write_ptr = txq->write_ptr;
211         int txq_id = txq->id;
212         u8 sec_ctl = 0;
213         u16 len = byte_cnt + IWL_TX_CRC_SIZE + IWL_TX_DELIMITER_SIZE;
214         __le16 bc_ent;
215         struct iwl_device_tx_cmd *dev_cmd = txq->entries[txq->write_ptr].cmd;
216         struct iwl_tx_cmd *tx_cmd = (void *)dev_cmd->payload;
217         u8 sta_id = tx_cmd->sta_id;
218
219         scd_bc_tbl = trans_pcie->scd_bc_tbls.addr;
220
221         sec_ctl = tx_cmd->sec_ctl;
222
223         switch (sec_ctl & TX_CMD_SEC_MSK) {
224         case TX_CMD_SEC_CCM:
225                 len += IEEE80211_CCMP_MIC_LEN;
226                 break;
227         case TX_CMD_SEC_TKIP:
228                 len += IEEE80211_TKIP_ICV_LEN;
229                 break;
230         case TX_CMD_SEC_WEP:
231                 len += IEEE80211_WEP_IV_LEN + IEEE80211_WEP_ICV_LEN;
232                 break;
233         }
234         if (trans_pcie->bc_table_dword)
235                 len = DIV_ROUND_UP(len, 4);
236
237         if (WARN_ON(len > 0xFFF || write_ptr >= TFD_QUEUE_SIZE_MAX))
238                 return;
239
240         bc_ent = cpu_to_le16(len | (sta_id << 12));
241
242         scd_bc_tbl[txq_id].tfd_offset[write_ptr] = bc_ent;
243
244         if (write_ptr < TFD_QUEUE_SIZE_BC_DUP)
245                 scd_bc_tbl[txq_id].
246                         tfd_offset[TFD_QUEUE_SIZE_MAX + write_ptr] = bc_ent;
247 }
248
249 static void iwl_pcie_txq_inval_byte_cnt_tbl(struct iwl_trans *trans,
250                                             struct iwl_txq *txq)
251 {
252         struct iwl_trans_pcie *trans_pcie =
253                 IWL_TRANS_GET_PCIE_TRANS(trans);
254         struct iwlagn_scd_bc_tbl *scd_bc_tbl = trans_pcie->scd_bc_tbls.addr;
255         int txq_id = txq->id;
256         int read_ptr = txq->read_ptr;
257         u8 sta_id = 0;
258         __le16 bc_ent;
259         struct iwl_device_tx_cmd *dev_cmd = txq->entries[read_ptr].cmd;
260         struct iwl_tx_cmd *tx_cmd = (void *)dev_cmd->payload;
261
262         WARN_ON(read_ptr >= TFD_QUEUE_SIZE_MAX);
263
264         if (txq_id != trans_pcie->cmd_queue)
265                 sta_id = tx_cmd->sta_id;
266
267         bc_ent = cpu_to_le16(1 | (sta_id << 12));
268
269         scd_bc_tbl[txq_id].tfd_offset[read_ptr] = bc_ent;
270
271         if (read_ptr < TFD_QUEUE_SIZE_BC_DUP)
272                 scd_bc_tbl[txq_id].
273                         tfd_offset[TFD_QUEUE_SIZE_MAX + read_ptr] = bc_ent;
274 }
275
276 /*
277  * iwl_pcie_txq_inc_wr_ptr - Send new write index to hardware
278  */
279 static void iwl_pcie_txq_inc_wr_ptr(struct iwl_trans *trans,
280                                     struct iwl_txq *txq)
281 {
282         struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
283         u32 reg = 0;
284         int txq_id = txq->id;
285
286         lockdep_assert_held(&txq->lock);
287
288         /*
289          * explicitly wake up the NIC if:
290          * 1. shadow registers aren't enabled
291          * 2. NIC is woken up for CMD regardless of shadow outside this function
292          * 3. there is a chance that the NIC is asleep
293          */
294         if (!trans->trans_cfg->base_params->shadow_reg_enable &&
295             txq_id != trans_pcie->cmd_queue &&
296             test_bit(STATUS_TPOWER_PMI, &trans->status)) {
297                 /*
298                  * wake up nic if it's powered down ...
299                  * uCode will wake up, and interrupt us again, so next
300                  * time we'll skip this part.
301                  */
302                 reg = iwl_read32(trans, CSR_UCODE_DRV_GP1);
303
304                 if (reg & CSR_UCODE_DRV_GP1_BIT_MAC_SLEEP) {
305                         IWL_DEBUG_INFO(trans, "Tx queue %d requesting wakeup, GP1 = 0x%x\n",
306                                        txq_id, reg);
307                         iwl_set_bit(trans, CSR_GP_CNTRL,
308                                     CSR_GP_CNTRL_REG_FLAG_MAC_ACCESS_REQ);
309                         txq->need_update = true;
310                         return;
311                 }
312         }
313
314         /*
315          * if not in power-save mode, uCode will never sleep when we're
316          * trying to tx (during RFKILL, we're not trying to tx).
317          */
318         IWL_DEBUG_TX(trans, "Q:%d WR: 0x%x\n", txq_id, txq->write_ptr);
319         if (!txq->block)
320                 iwl_write32(trans, HBUS_TARG_WRPTR,
321                             txq->write_ptr | (txq_id << 8));
322 }
323
324 void iwl_pcie_txq_check_wrptrs(struct iwl_trans *trans)
325 {
326         struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
327         int i;
328
329         for (i = 0; i < trans->trans_cfg->base_params->num_of_queues; i++) {
330                 struct iwl_txq *txq = trans_pcie->txq[i];
331
332                 if (!test_bit(i, trans_pcie->queue_used))
333                         continue;
334
335                 spin_lock_bh(&txq->lock);
336                 if (txq->need_update) {
337                         iwl_pcie_txq_inc_wr_ptr(trans, txq);
338                         txq->need_update = false;
339                 }
340                 spin_unlock_bh(&txq->lock);
341         }
342 }
343
344 static inline dma_addr_t iwl_pcie_tfd_tb_get_addr(struct iwl_trans *trans,
345                                                   void *_tfd, u8 idx)
346 {
347
348         if (trans->trans_cfg->use_tfh) {
349                 struct iwl_tfh_tfd *tfd = _tfd;
350                 struct iwl_tfh_tb *tb = &tfd->tbs[idx];
351
352                 return (dma_addr_t)(le64_to_cpu(tb->addr));
353         } else {
354                 struct iwl_tfd *tfd = _tfd;
355                 struct iwl_tfd_tb *tb = &tfd->tbs[idx];
356                 dma_addr_t addr = get_unaligned_le32(&tb->lo);
357                 dma_addr_t hi_len;
358
359                 if (sizeof(dma_addr_t) <= sizeof(u32))
360                         return addr;
361
362                 hi_len = le16_to_cpu(tb->hi_n_len) & 0xF;
363
364                 /*
365                  * shift by 16 twice to avoid warnings on 32-bit
366                  * (where this code never runs anyway due to the
367                  * if statement above)
368                  */
369                 return addr | ((hi_len << 16) << 16);
370         }
371 }
372
373 static inline void iwl_pcie_tfd_set_tb(struct iwl_trans *trans, void *tfd,
374                                        u8 idx, dma_addr_t addr, u16 len)
375 {
376         struct iwl_tfd *tfd_fh = (void *)tfd;
377         struct iwl_tfd_tb *tb = &tfd_fh->tbs[idx];
378
379         u16 hi_n_len = len << 4;
380
381         put_unaligned_le32(addr, &tb->lo);
382         hi_n_len |= iwl_get_dma_hi_addr(addr);
383
384         tb->hi_n_len = cpu_to_le16(hi_n_len);
385
386         tfd_fh->num_tbs = idx + 1;
387 }
388
389 static inline u8 iwl_pcie_tfd_get_num_tbs(struct iwl_trans *trans, void *_tfd)
390 {
391         if (trans->trans_cfg->use_tfh) {
392                 struct iwl_tfh_tfd *tfd = _tfd;
393
394                 return le16_to_cpu(tfd->num_tbs) & 0x1f;
395         } else {
396                 struct iwl_tfd *tfd = _tfd;
397
398                 return tfd->num_tbs & 0x1f;
399         }
400 }
401
402 static void iwl_pcie_tfd_unmap(struct iwl_trans *trans,
403                                struct iwl_cmd_meta *meta,
404                                struct iwl_txq *txq, int index)
405 {
406         struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
407         int i, num_tbs;
408         void *tfd = iwl_pcie_get_tfd(trans, txq, index);
409
410         /* Sanity check on number of chunks */
411         num_tbs = iwl_pcie_tfd_get_num_tbs(trans, tfd);
412
413         if (num_tbs > trans_pcie->max_tbs) {
414                 IWL_ERR(trans, "Too many chunks: %i\n", num_tbs);
415                 /* @todo issue fatal error, it is quite serious situation */
416                 return;
417         }
418
419         /* first TB is never freed - it's the bidirectional DMA data */
420
421         for (i = 1; i < num_tbs; i++) {
422                 if (meta->tbs & BIT(i))
423                         dma_unmap_page(trans->dev,
424                                        iwl_pcie_tfd_tb_get_addr(trans, tfd, i),
425                                        iwl_pcie_tfd_tb_get_len(trans, tfd, i),
426                                        DMA_TO_DEVICE);
427                 else
428                         dma_unmap_single(trans->dev,
429                                          iwl_pcie_tfd_tb_get_addr(trans, tfd,
430                                                                   i),
431                                          iwl_pcie_tfd_tb_get_len(trans, tfd,
432                                                                  i),
433                                          DMA_TO_DEVICE);
434         }
435
436         meta->tbs = 0;
437
438         if (trans->trans_cfg->use_tfh) {
439                 struct iwl_tfh_tfd *tfd_fh = (void *)tfd;
440
441                 tfd_fh->num_tbs = 0;
442         } else {
443                 struct iwl_tfd *tfd_fh = (void *)tfd;
444
445                 tfd_fh->num_tbs = 0;
446         }
447
448 }
449
450 /*
451  * iwl_pcie_txq_free_tfd - Free all chunks referenced by TFD [txq->q.read_ptr]
452  * @trans - transport private data
453  * @txq - tx queue
454  * @dma_dir - the direction of the DMA mapping
455  *
456  * Does NOT advance any TFD circular buffer read/write indexes
457  * Does NOT free the TFD itself (which is within circular buffer)
458  */
459 void iwl_pcie_txq_free_tfd(struct iwl_trans *trans, struct iwl_txq *txq)
460 {
461         /* rd_ptr is bounded by TFD_QUEUE_SIZE_MAX and
462          * idx is bounded by n_window
463          */
464         int rd_ptr = txq->read_ptr;
465         int idx = iwl_pcie_get_cmd_index(txq, rd_ptr);
466
467         lockdep_assert_held(&txq->lock);
468
469         /* We have only q->n_window txq->entries, but we use
470          * TFD_QUEUE_SIZE_MAX tfds
471          */
472         iwl_pcie_tfd_unmap(trans, &txq->entries[idx].meta, txq, rd_ptr);
473
474         /* free SKB */
475         if (txq->entries) {
476                 struct sk_buff *skb;
477
478                 skb = txq->entries[idx].skb;
479
480                 /* Can be called from irqs-disabled context
481                  * If skb is not NULL, it means that the whole queue is being
482                  * freed and that the queue is not empty - free the skb
483                  */
484                 if (skb) {
485                         iwl_op_mode_free_skb(trans->op_mode, skb);
486                         txq->entries[idx].skb = NULL;
487                 }
488         }
489 }
490
491 static int iwl_pcie_txq_build_tfd(struct iwl_trans *trans, struct iwl_txq *txq,
492                                   dma_addr_t addr, u16 len, bool reset)
493 {
494         struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
495         void *tfd;
496         u32 num_tbs;
497
498         tfd = txq->tfds + trans_pcie->tfd_size * txq->write_ptr;
499
500         if (reset)
501                 memset(tfd, 0, trans_pcie->tfd_size);
502
503         num_tbs = iwl_pcie_tfd_get_num_tbs(trans, tfd);
504
505         /* Each TFD can point to a maximum max_tbs Tx buffers */
506         if (num_tbs >= trans_pcie->max_tbs) {
507                 IWL_ERR(trans, "Error can not send more than %d chunks\n",
508                         trans_pcie->max_tbs);
509                 return -EINVAL;
510         }
511
512         if (WARN(addr & ~IWL_TX_DMA_MASK,
513                  "Unaligned address = %llx\n", (unsigned long long)addr))
514                 return -EINVAL;
515
516         iwl_pcie_tfd_set_tb(trans, tfd, num_tbs, addr, len);
517
518         return num_tbs;
519 }
520
521 int iwl_pcie_txq_alloc(struct iwl_trans *trans, struct iwl_txq *txq,
522                        int slots_num, bool cmd_queue)
523 {
524         struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
525         size_t tfd_sz = trans_pcie->tfd_size *
526                 trans->trans_cfg->base_params->max_tfd_queue_size;
527         size_t tb0_buf_sz;
528         int i;
529
530         if (WARN_ON(txq->entries || txq->tfds))
531                 return -EINVAL;
532
533         if (trans->trans_cfg->use_tfh)
534                 tfd_sz = trans_pcie->tfd_size * slots_num;
535
536         timer_setup(&txq->stuck_timer, iwl_pcie_txq_stuck_timer, 0);
537         txq->trans = trans;
538
539         txq->n_window = slots_num;
540
541         txq->entries = kcalloc(slots_num,
542                                sizeof(struct iwl_pcie_txq_entry),
543                                GFP_KERNEL);
544
545         if (!txq->entries)
546                 goto error;
547
548         if (cmd_queue)
549                 for (i = 0; i < slots_num; i++) {
550                         txq->entries[i].cmd =
551                                 kmalloc(sizeof(struct iwl_device_cmd),
552                                         GFP_KERNEL);
553                         if (!txq->entries[i].cmd)
554                                 goto error;
555                 }
556
557         /* Circular buffer of transmit frame descriptors (TFDs),
558          * shared with device */
559         txq->tfds = dma_alloc_coherent(trans->dev, tfd_sz,
560                                        &txq->dma_addr, GFP_KERNEL);
561         if (!txq->tfds)
562                 goto error;
563
564         BUILD_BUG_ON(IWL_FIRST_TB_SIZE_ALIGN != sizeof(*txq->first_tb_bufs));
565
566         tb0_buf_sz = sizeof(*txq->first_tb_bufs) * slots_num;
567
568         txq->first_tb_bufs = dma_alloc_coherent(trans->dev, tb0_buf_sz,
569                                               &txq->first_tb_dma,
570                                               GFP_KERNEL);
571         if (!txq->first_tb_bufs)
572                 goto err_free_tfds;
573
574         return 0;
575 err_free_tfds:
576         dma_free_coherent(trans->dev, tfd_sz, txq->tfds, txq->dma_addr);
577 error:
578         if (txq->entries && cmd_queue)
579                 for (i = 0; i < slots_num; i++)
580                         kfree(txq->entries[i].cmd);
581         kfree(txq->entries);
582         txq->entries = NULL;
583
584         return -ENOMEM;
585
586 }
587
588 int iwl_pcie_txq_init(struct iwl_trans *trans, struct iwl_txq *txq,
589                       int slots_num, bool cmd_queue)
590 {
591         int ret;
592         u32 tfd_queue_max_size =
593                 trans->trans_cfg->base_params->max_tfd_queue_size;
594
595         txq->need_update = false;
596
597         /* max_tfd_queue_size must be power-of-two size, otherwise
598          * iwl_queue_inc_wrap and iwl_queue_dec_wrap are broken. */
599         if (WARN_ONCE(tfd_queue_max_size & (tfd_queue_max_size - 1),
600                       "Max tfd queue size must be a power of two, but is %d",
601                       tfd_queue_max_size))
602                 return -EINVAL;
603
604         /* Initialize queue's high/low-water marks, and head/tail indexes */
605         ret = iwl_queue_init(txq, slots_num);
606         if (ret)
607                 return ret;
608
609         spin_lock_init(&txq->lock);
610
611         if (cmd_queue) {
612                 static struct lock_class_key iwl_pcie_cmd_queue_lock_class;
613
614                 lockdep_set_class(&txq->lock, &iwl_pcie_cmd_queue_lock_class);
615         }
616
617         __skb_queue_head_init(&txq->overflow_q);
618
619         return 0;
620 }
621
622 void iwl_pcie_free_tso_page(struct iwl_trans_pcie *trans_pcie,
623                             struct sk_buff *skb)
624 {
625         struct page **page_ptr;
626         struct page *next;
627
628         page_ptr = (void *)((u8 *)skb->cb + trans_pcie->page_offs);
629         next = *page_ptr;
630         *page_ptr = NULL;
631
632         while (next) {
633                 struct page *tmp = next;
634
635                 next = *(void **)(page_address(next) + PAGE_SIZE -
636                                   sizeof(void *));
637                 __free_page(tmp);
638         }
639 }
640
641 static void iwl_pcie_clear_cmd_in_flight(struct iwl_trans *trans)
642 {
643         struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
644
645         lockdep_assert_held(&trans_pcie->reg_lock);
646
647         if (!trans->trans_cfg->base_params->apmg_wake_up_wa)
648                 return;
649         if (WARN_ON(!trans_pcie->cmd_hold_nic_awake))
650                 return;
651
652         trans_pcie->cmd_hold_nic_awake = false;
653         __iwl_trans_pcie_clear_bit(trans, CSR_GP_CNTRL,
654                                    CSR_GP_CNTRL_REG_FLAG_MAC_ACCESS_REQ);
655 }
656
657 /*
658  * iwl_pcie_txq_unmap -  Unmap any remaining DMA mappings and free skb's
659  */
660 static void iwl_pcie_txq_unmap(struct iwl_trans *trans, int txq_id)
661 {
662         struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
663         struct iwl_txq *txq = trans_pcie->txq[txq_id];
664
665         spin_lock_bh(&txq->lock);
666         while (txq->write_ptr != txq->read_ptr) {
667                 IWL_DEBUG_TX_REPLY(trans, "Q %d Free %d\n",
668                                    txq_id, txq->read_ptr);
669
670                 if (txq_id != trans_pcie->cmd_queue) {
671                         struct sk_buff *skb = txq->entries[txq->read_ptr].skb;
672
673                         if (WARN_ON_ONCE(!skb))
674                                 continue;
675
676                         iwl_pcie_free_tso_page(trans_pcie, skb);
677                 }
678                 iwl_pcie_txq_free_tfd(trans, txq);
679                 txq->read_ptr = iwl_queue_inc_wrap(trans, txq->read_ptr);
680
681                 if (txq->read_ptr == txq->write_ptr) {
682                         unsigned long flags;
683
684                         spin_lock_irqsave(&trans_pcie->reg_lock, flags);
685                         if (txq_id == trans_pcie->cmd_queue)
686                                 iwl_pcie_clear_cmd_in_flight(trans);
687                         spin_unlock_irqrestore(&trans_pcie->reg_lock, flags);
688                 }
689         }
690
691         while (!skb_queue_empty(&txq->overflow_q)) {
692                 struct sk_buff *skb = __skb_dequeue(&txq->overflow_q);
693
694                 iwl_op_mode_free_skb(trans->op_mode, skb);
695         }
696
697         spin_unlock_bh(&txq->lock);
698
699         /* just in case - this queue may have been stopped */
700         iwl_wake_queue(trans, txq);
701 }
702
703 /*
704  * iwl_pcie_txq_free - Deallocate DMA queue.
705  * @txq: Transmit queue to deallocate.
706  *
707  * Empty queue by removing and destroying all BD's.
708  * Free all buffers.
709  * 0-fill, but do not free "txq" descriptor structure.
710  */
711 static void iwl_pcie_txq_free(struct iwl_trans *trans, int txq_id)
712 {
713         struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
714         struct iwl_txq *txq = trans_pcie->txq[txq_id];
715         struct device *dev = trans->dev;
716         int i;
717
718         if (WARN_ON(!txq))
719                 return;
720
721         iwl_pcie_txq_unmap(trans, txq_id);
722
723         /* De-alloc array of command/tx buffers */
724         if (txq_id == trans_pcie->cmd_queue)
725                 for (i = 0; i < txq->n_window; i++) {
726                         kzfree(txq->entries[i].cmd);
727                         kzfree(txq->entries[i].free_buf);
728                 }
729
730         /* De-alloc circular buffer of TFDs */
731         if (txq->tfds) {
732                 dma_free_coherent(dev,
733                                   trans_pcie->tfd_size *
734                                   trans->trans_cfg->base_params->max_tfd_queue_size,
735                                   txq->tfds, txq->dma_addr);
736                 txq->dma_addr = 0;
737                 txq->tfds = NULL;
738
739                 dma_free_coherent(dev,
740                                   sizeof(*txq->first_tb_bufs) * txq->n_window,
741                                   txq->first_tb_bufs, txq->first_tb_dma);
742         }
743
744         kfree(txq->entries);
745         txq->entries = NULL;
746
747         del_timer_sync(&txq->stuck_timer);
748
749         /* 0-fill queue descriptor structure */
750         memset(txq, 0, sizeof(*txq));
751 }
752
753 void iwl_pcie_tx_start(struct iwl_trans *trans, u32 scd_base_addr)
754 {
755         struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
756         int nq = trans->trans_cfg->base_params->num_of_queues;
757         int chan;
758         u32 reg_val;
759         int clear_dwords = (SCD_TRANS_TBL_OFFSET_QUEUE(nq) -
760                                 SCD_CONTEXT_MEM_LOWER_BOUND) / sizeof(u32);
761
762         /* make sure all queue are not stopped/used */
763         memset(trans_pcie->queue_stopped, 0, sizeof(trans_pcie->queue_stopped));
764         memset(trans_pcie->queue_used, 0, sizeof(trans_pcie->queue_used));
765
766         trans_pcie->scd_base_addr =
767                 iwl_read_prph(trans, SCD_SRAM_BASE_ADDR);
768
769         WARN_ON(scd_base_addr != 0 &&
770                 scd_base_addr != trans_pcie->scd_base_addr);
771
772         /* reset context data, TX status and translation data */
773         iwl_trans_write_mem(trans, trans_pcie->scd_base_addr +
774                                    SCD_CONTEXT_MEM_LOWER_BOUND,
775                             NULL, clear_dwords);
776
777         iwl_write_prph(trans, SCD_DRAM_BASE_ADDR,
778                        trans_pcie->scd_bc_tbls.dma >> 10);
779
780         /* The chain extension of the SCD doesn't work well. This feature is
781          * enabled by default by the HW, so we need to disable it manually.
782          */
783         if (trans->trans_cfg->base_params->scd_chain_ext_wa)
784                 iwl_write_prph(trans, SCD_CHAINEXT_EN, 0);
785
786         iwl_trans_ac_txq_enable(trans, trans_pcie->cmd_queue,
787                                 trans_pcie->cmd_fifo,
788                                 trans_pcie->cmd_q_wdg_timeout);
789
790         /* Activate all Tx DMA/FIFO channels */
791         iwl_scd_activate_fifos(trans);
792
793         /* Enable DMA channel */
794         for (chan = 0; chan < FH_TCSR_CHNL_NUM; chan++)
795                 iwl_write_direct32(trans, FH_TCSR_CHNL_TX_CONFIG_REG(chan),
796                                    FH_TCSR_TX_CONFIG_REG_VAL_DMA_CHNL_ENABLE |
797                                    FH_TCSR_TX_CONFIG_REG_VAL_DMA_CREDIT_ENABLE);
798
799         /* Update FH chicken bits */
800         reg_val = iwl_read_direct32(trans, FH_TX_CHICKEN_BITS_REG);
801         iwl_write_direct32(trans, FH_TX_CHICKEN_BITS_REG,
802                            reg_val | FH_TX_CHICKEN_BITS_SCD_AUTO_RETRY_EN);
803
804         /* Enable L1-Active */
805         if (trans->trans_cfg->device_family < IWL_DEVICE_FAMILY_8000)
806                 iwl_clear_bits_prph(trans, APMG_PCIDEV_STT_REG,
807                                     APMG_PCIDEV_STT_VAL_L1_ACT_DIS);
808 }
809
810 void iwl_trans_pcie_tx_reset(struct iwl_trans *trans)
811 {
812         struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
813         int txq_id;
814
815         /*
816          * we should never get here in gen2 trans mode return early to avoid
817          * having invalid accesses
818          */
819         if (WARN_ON_ONCE(trans->trans_cfg->gen2))
820                 return;
821
822         for (txq_id = 0; txq_id < trans->trans_cfg->base_params->num_of_queues;
823              txq_id++) {
824                 struct iwl_txq *txq = trans_pcie->txq[txq_id];
825                 if (trans->trans_cfg->use_tfh)
826                         iwl_write_direct64(trans,
827                                            FH_MEM_CBBC_QUEUE(trans, txq_id),
828                                            txq->dma_addr);
829                 else
830                         iwl_write_direct32(trans,
831                                            FH_MEM_CBBC_QUEUE(trans, txq_id),
832                                            txq->dma_addr >> 8);
833                 iwl_pcie_txq_unmap(trans, txq_id);
834                 txq->read_ptr = 0;
835                 txq->write_ptr = 0;
836         }
837
838         /* Tell NIC where to find the "keep warm" buffer */
839         iwl_write_direct32(trans, FH_KW_MEM_ADDR_REG,
840                            trans_pcie->kw.dma >> 4);
841
842         /*
843          * Send 0 as the scd_base_addr since the device may have be reset
844          * while we were in WoWLAN in which case SCD_SRAM_BASE_ADDR will
845          * contain garbage.
846          */
847         iwl_pcie_tx_start(trans, 0);
848 }
849
850 static void iwl_pcie_tx_stop_fh(struct iwl_trans *trans)
851 {
852         struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
853         unsigned long flags;
854         int ch, ret;
855         u32 mask = 0;
856
857         spin_lock(&trans_pcie->irq_lock);
858
859         if (!iwl_trans_grab_nic_access(trans, &flags))
860                 goto out;
861
862         /* Stop each Tx DMA channel */
863         for (ch = 0; ch < FH_TCSR_CHNL_NUM; ch++) {
864                 iwl_write32(trans, FH_TCSR_CHNL_TX_CONFIG_REG(ch), 0x0);
865                 mask |= FH_TSSR_TX_STATUS_REG_MSK_CHNL_IDLE(ch);
866         }
867
868         /* Wait for DMA channels to be idle */
869         ret = iwl_poll_bit(trans, FH_TSSR_TX_STATUS_REG, mask, mask, 5000);
870         if (ret < 0)
871                 IWL_ERR(trans,
872                         "Failing on timeout while stopping DMA channel %d [0x%08x]\n",
873                         ch, iwl_read32(trans, FH_TSSR_TX_STATUS_REG));
874
875         iwl_trans_release_nic_access(trans, &flags);
876
877 out:
878         spin_unlock(&trans_pcie->irq_lock);
879 }
880
881 /*
882  * iwl_pcie_tx_stop - Stop all Tx DMA channels
883  */
884 int iwl_pcie_tx_stop(struct iwl_trans *trans)
885 {
886         struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
887         int txq_id;
888
889         /* Turn off all Tx DMA fifos */
890         iwl_scd_deactivate_fifos(trans);
891
892         /* Turn off all Tx DMA channels */
893         iwl_pcie_tx_stop_fh(trans);
894
895         /*
896          * This function can be called before the op_mode disabled the
897          * queues. This happens when we have an rfkill interrupt.
898          * Since we stop Tx altogether - mark the queues as stopped.
899          */
900         memset(trans_pcie->queue_stopped, 0, sizeof(trans_pcie->queue_stopped));
901         memset(trans_pcie->queue_used, 0, sizeof(trans_pcie->queue_used));
902
903         /* This can happen: start_hw, stop_device */
904         if (!trans_pcie->txq_memory)
905                 return 0;
906
907         /* Unmap DMA from host system and free skb's */
908         for (txq_id = 0; txq_id < trans->trans_cfg->base_params->num_of_queues;
909              txq_id++)
910                 iwl_pcie_txq_unmap(trans, txq_id);
911
912         return 0;
913 }
914
915 /*
916  * iwl_trans_tx_free - Free TXQ Context
917  *
918  * Destroy all TX DMA queues and structures
919  */
920 void iwl_pcie_tx_free(struct iwl_trans *trans)
921 {
922         int txq_id;
923         struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
924
925         memset(trans_pcie->queue_used, 0, sizeof(trans_pcie->queue_used));
926
927         /* Tx queues */
928         if (trans_pcie->txq_memory) {
929                 for (txq_id = 0;
930                      txq_id < trans->trans_cfg->base_params->num_of_queues;
931                      txq_id++) {
932                         iwl_pcie_txq_free(trans, txq_id);
933                         trans_pcie->txq[txq_id] = NULL;
934                 }
935         }
936
937         kfree(trans_pcie->txq_memory);
938         trans_pcie->txq_memory = NULL;
939
940         iwl_pcie_free_dma_ptr(trans, &trans_pcie->kw);
941
942         iwl_pcie_free_dma_ptr(trans, &trans_pcie->scd_bc_tbls);
943 }
944
945 /*
946  * iwl_pcie_tx_alloc - allocate TX context
947  * Allocate all Tx DMA structures and initialize them
948  */
949 static int iwl_pcie_tx_alloc(struct iwl_trans *trans)
950 {
951         int ret;
952         int txq_id, slots_num;
953         struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
954         u16 bc_tbls_size = trans->trans_cfg->base_params->num_of_queues;
955
956         if (WARN_ON(trans->trans_cfg->device_family >= IWL_DEVICE_FAMILY_AX210))
957                 return -EINVAL;
958
959         bc_tbls_size *= sizeof(struct iwlagn_scd_bc_tbl);
960
961         /*It is not allowed to alloc twice, so warn when this happens.
962          * We cannot rely on the previous allocation, so free and fail */
963         if (WARN_ON(trans_pcie->txq_memory)) {
964                 ret = -EINVAL;
965                 goto error;
966         }
967
968         ret = iwl_pcie_alloc_dma_ptr(trans, &trans_pcie->scd_bc_tbls,
969                                      bc_tbls_size);
970         if (ret) {
971                 IWL_ERR(trans, "Scheduler BC Table allocation failed\n");
972                 goto error;
973         }
974
975         /* Alloc keep-warm buffer */
976         ret = iwl_pcie_alloc_dma_ptr(trans, &trans_pcie->kw, IWL_KW_SIZE);
977         if (ret) {
978                 IWL_ERR(trans, "Keep Warm allocation failed\n");
979                 goto error;
980         }
981
982         trans_pcie->txq_memory =
983                 kcalloc(trans->trans_cfg->base_params->num_of_queues,
984                         sizeof(struct iwl_txq), GFP_KERNEL);
985         if (!trans_pcie->txq_memory) {
986                 IWL_ERR(trans, "Not enough memory for txq\n");
987                 ret = -ENOMEM;
988                 goto error;
989         }
990
991         /* Alloc and init all Tx queues, including the command queue (#4/#9) */
992         for (txq_id = 0; txq_id < trans->trans_cfg->base_params->num_of_queues;
993              txq_id++) {
994                 bool cmd_queue = (txq_id == trans_pcie->cmd_queue);
995
996                 if (cmd_queue)
997                         slots_num = max_t(u32, IWL_CMD_QUEUE_SIZE,
998                                           trans->cfg->min_txq_size);
999                 else
1000                         slots_num = max_t(u32, IWL_DEFAULT_QUEUE_SIZE,
1001                                           trans->cfg->min_256_ba_txq_size);
1002                 trans_pcie->txq[txq_id] = &trans_pcie->txq_memory[txq_id];
1003                 ret = iwl_pcie_txq_alloc(trans, trans_pcie->txq[txq_id],
1004                                          slots_num, cmd_queue);
1005                 if (ret) {
1006                         IWL_ERR(trans, "Tx %d queue alloc failed\n", txq_id);
1007                         goto error;
1008                 }
1009                 trans_pcie->txq[txq_id]->id = txq_id;
1010         }
1011
1012         return 0;
1013
1014 error:
1015         iwl_pcie_tx_free(trans);
1016
1017         return ret;
1018 }
1019
1020 int iwl_pcie_tx_init(struct iwl_trans *trans)
1021 {
1022         struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
1023         int ret;
1024         int txq_id, slots_num;
1025         bool alloc = false;
1026
1027         if (!trans_pcie->txq_memory) {
1028                 ret = iwl_pcie_tx_alloc(trans);
1029                 if (ret)
1030                         goto error;
1031                 alloc = true;
1032         }
1033
1034         spin_lock(&trans_pcie->irq_lock);
1035
1036         /* Turn off all Tx DMA fifos */
1037         iwl_scd_deactivate_fifos(trans);
1038
1039         /* Tell NIC where to find the "keep warm" buffer */
1040         iwl_write_direct32(trans, FH_KW_MEM_ADDR_REG,
1041                            trans_pcie->kw.dma >> 4);
1042
1043         spin_unlock(&trans_pcie->irq_lock);
1044
1045         /* Alloc and init all Tx queues, including the command queue (#4/#9) */
1046         for (txq_id = 0; txq_id < trans->trans_cfg->base_params->num_of_queues;
1047              txq_id++) {
1048                 bool cmd_queue = (txq_id == trans_pcie->cmd_queue);
1049
1050                 if (cmd_queue)
1051                         slots_num = max_t(u32, IWL_CMD_QUEUE_SIZE,
1052                                           trans->cfg->min_txq_size);
1053                 else
1054                         slots_num = max_t(u32, IWL_DEFAULT_QUEUE_SIZE,
1055                                           trans->cfg->min_256_ba_txq_size);
1056                 ret = iwl_pcie_txq_init(trans, trans_pcie->txq[txq_id],
1057                                         slots_num, cmd_queue);
1058                 if (ret) {
1059                         IWL_ERR(trans, "Tx %d queue init failed\n", txq_id);
1060                         goto error;
1061                 }
1062
1063                 /*
1064                  * Tell nic where to find circular buffer of TFDs for a
1065                  * given Tx queue, and enable the DMA channel used for that
1066                  * queue.
1067                  * Circular buffer (TFD queue in DRAM) physical base address
1068                  */
1069                 iwl_write_direct32(trans, FH_MEM_CBBC_QUEUE(trans, txq_id),
1070                                    trans_pcie->txq[txq_id]->dma_addr >> 8);
1071         }
1072
1073         iwl_set_bits_prph(trans, SCD_GP_CTRL, SCD_GP_CTRL_AUTO_ACTIVE_MODE);
1074         if (trans->trans_cfg->base_params->num_of_queues > 20)
1075                 iwl_set_bits_prph(trans, SCD_GP_CTRL,
1076                                   SCD_GP_CTRL_ENABLE_31_QUEUES);
1077
1078         return 0;
1079 error:
1080         /*Upon error, free only if we allocated something */
1081         if (alloc)
1082                 iwl_pcie_tx_free(trans);
1083         return ret;
1084 }
1085
1086 static inline void iwl_pcie_txq_progress(struct iwl_txq *txq)
1087 {
1088         lockdep_assert_held(&txq->lock);
1089
1090         if (!txq->wd_timeout)
1091                 return;
1092
1093         /*
1094          * station is asleep and we send data - that must
1095          * be uAPSD or PS-Poll. Don't rearm the timer.
1096          */
1097         if (txq->frozen)
1098                 return;
1099
1100         /*
1101          * if empty delete timer, otherwise move timer forward
1102          * since we're making progress on this queue
1103          */
1104         if (txq->read_ptr == txq->write_ptr)
1105                 del_timer(&txq->stuck_timer);
1106         else
1107                 mod_timer(&txq->stuck_timer, jiffies + txq->wd_timeout);
1108 }
1109
1110 /* Frees buffers until index _not_ inclusive */
1111 void iwl_trans_pcie_reclaim(struct iwl_trans *trans, int txq_id, int ssn,
1112                             struct sk_buff_head *skbs)
1113 {
1114         struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
1115         struct iwl_txq *txq = trans_pcie->txq[txq_id];
1116         int tfd_num = iwl_pcie_get_cmd_index(txq, ssn);
1117         int read_ptr = iwl_pcie_get_cmd_index(txq, txq->read_ptr);
1118         int last_to_free;
1119
1120         /* This function is not meant to release cmd queue*/
1121         if (WARN_ON(txq_id == trans_pcie->cmd_queue))
1122                 return;
1123
1124         spin_lock_bh(&txq->lock);
1125
1126         if (!test_bit(txq_id, trans_pcie->queue_used)) {
1127                 IWL_DEBUG_TX_QUEUES(trans, "Q %d inactive - ignoring idx %d\n",
1128                                     txq_id, ssn);
1129                 goto out;
1130         }
1131
1132         if (read_ptr == tfd_num)
1133                 goto out;
1134
1135         IWL_DEBUG_TX_REPLY(trans, "[Q %d] %d -> %d (%d)\n",
1136                            txq_id, txq->read_ptr, tfd_num, ssn);
1137
1138         /*Since we free until index _not_ inclusive, the one before index is
1139          * the last we will free. This one must be used */
1140         last_to_free = iwl_queue_dec_wrap(trans, tfd_num);
1141
1142         if (!iwl_queue_used(txq, last_to_free)) {
1143                 IWL_ERR(trans,
1144                         "%s: Read index for txq id (%d), last_to_free %d is out of range [0-%d] %d %d.\n",
1145                         __func__, txq_id, last_to_free,
1146                         trans->trans_cfg->base_params->max_tfd_queue_size,
1147                         txq->write_ptr, txq->read_ptr);
1148                 goto out;
1149         }
1150
1151         if (WARN_ON(!skb_queue_empty(skbs)))
1152                 goto out;
1153
1154         for (;
1155              read_ptr != tfd_num;
1156              txq->read_ptr = iwl_queue_inc_wrap(trans, txq->read_ptr),
1157              read_ptr = iwl_pcie_get_cmd_index(txq, txq->read_ptr)) {
1158                 struct sk_buff *skb = txq->entries[read_ptr].skb;
1159
1160                 if (WARN_ON_ONCE(!skb))
1161                         continue;
1162
1163                 iwl_pcie_free_tso_page(trans_pcie, skb);
1164
1165                 __skb_queue_tail(skbs, skb);
1166
1167                 txq->entries[read_ptr].skb = NULL;
1168
1169                 if (!trans->trans_cfg->use_tfh)
1170                         iwl_pcie_txq_inval_byte_cnt_tbl(trans, txq);
1171
1172                 iwl_pcie_txq_free_tfd(trans, txq);
1173         }
1174
1175         iwl_pcie_txq_progress(txq);
1176
1177         if (iwl_queue_space(trans, txq) > txq->low_mark &&
1178             test_bit(txq_id, trans_pcie->queue_stopped)) {
1179                 struct sk_buff_head overflow_skbs;
1180
1181                 __skb_queue_head_init(&overflow_skbs);
1182                 skb_queue_splice_init(&txq->overflow_q, &overflow_skbs);
1183
1184                 /*
1185                  * We are going to transmit from the overflow queue.
1186                  * Remember this state so that wait_for_txq_empty will know we
1187                  * are adding more packets to the TFD queue. It cannot rely on
1188                  * the state of &txq->overflow_q, as we just emptied it, but
1189                  * haven't TXed the content yet.
1190                  */
1191                 txq->overflow_tx = true;
1192
1193                 /*
1194                  * This is tricky: we are in reclaim path which is non
1195                  * re-entrant, so noone will try to take the access the
1196                  * txq data from that path. We stopped tx, so we can't
1197                  * have tx as well. Bottom line, we can unlock and re-lock
1198                  * later.
1199                  */
1200                 spin_unlock_bh(&txq->lock);
1201
1202                 while (!skb_queue_empty(&overflow_skbs)) {
1203                         struct sk_buff *skb = __skb_dequeue(&overflow_skbs);
1204                         struct iwl_device_tx_cmd *dev_cmd_ptr;
1205
1206                         dev_cmd_ptr = *(void **)((u8 *)skb->cb +
1207                                                  trans_pcie->dev_cmd_offs);
1208
1209                         /*
1210                          * Note that we can very well be overflowing again.
1211                          * In that case, iwl_queue_space will be small again
1212                          * and we won't wake mac80211's queue.
1213                          */
1214                         iwl_trans_tx(trans, skb, dev_cmd_ptr, txq_id);
1215                 }
1216
1217                 if (iwl_queue_space(trans, txq) > txq->low_mark)
1218                         iwl_wake_queue(trans, txq);
1219
1220                 spin_lock_bh(&txq->lock);
1221                 txq->overflow_tx = false;
1222         }
1223
1224 out:
1225         spin_unlock_bh(&txq->lock);
1226 }
1227
1228 /* Set wr_ptr of specific device and txq  */
1229 void iwl_trans_pcie_set_q_ptrs(struct iwl_trans *trans, int txq_id, int ptr)
1230 {
1231         struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
1232         struct iwl_txq *txq = trans_pcie->txq[txq_id];
1233
1234         spin_lock_bh(&txq->lock);
1235
1236         txq->write_ptr = ptr;
1237         txq->read_ptr = txq->write_ptr;
1238
1239         spin_unlock_bh(&txq->lock);
1240 }
1241
1242 static int iwl_pcie_set_cmd_in_flight(struct iwl_trans *trans,
1243                                       const struct iwl_host_cmd *cmd)
1244 {
1245         struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
1246         int ret;
1247
1248         lockdep_assert_held(&trans_pcie->reg_lock);
1249
1250         /* Make sure the NIC is still alive in the bus */
1251         if (test_bit(STATUS_TRANS_DEAD, &trans->status))
1252                 return -ENODEV;
1253
1254         /*
1255          * wake up the NIC to make sure that the firmware will see the host
1256          * command - we will let the NIC sleep once all the host commands
1257          * returned. This needs to be done only on NICs that have
1258          * apmg_wake_up_wa set.
1259          */
1260         if (trans->trans_cfg->base_params->apmg_wake_up_wa &&
1261             !trans_pcie->cmd_hold_nic_awake) {
1262                 __iwl_trans_pcie_set_bit(trans, CSR_GP_CNTRL,
1263                                          CSR_GP_CNTRL_REG_FLAG_MAC_ACCESS_REQ);
1264
1265                 ret = iwl_poll_bit(trans, CSR_GP_CNTRL,
1266                                    CSR_GP_CNTRL_REG_VAL_MAC_ACCESS_EN,
1267                                    (CSR_GP_CNTRL_REG_FLAG_MAC_CLOCK_READY |
1268                                     CSR_GP_CNTRL_REG_FLAG_GOING_TO_SLEEP),
1269                                    15000);
1270                 if (ret < 0) {
1271                         __iwl_trans_pcie_clear_bit(trans, CSR_GP_CNTRL,
1272                                         CSR_GP_CNTRL_REG_FLAG_MAC_ACCESS_REQ);
1273                         IWL_ERR(trans, "Failed to wake NIC for hcmd\n");
1274                         return -EIO;
1275                 }
1276                 trans_pcie->cmd_hold_nic_awake = true;
1277         }
1278
1279         return 0;
1280 }
1281
1282 /*
1283  * iwl_pcie_cmdq_reclaim - Reclaim TX command queue entries already Tx'd
1284  *
1285  * When FW advances 'R' index, all entries between old and new 'R' index
1286  * need to be reclaimed. As result, some free space forms.  If there is
1287  * enough free space (> low mark), wake the stack that feeds us.
1288  */
1289 static void iwl_pcie_cmdq_reclaim(struct iwl_trans *trans, int txq_id, int idx)
1290 {
1291         struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
1292         struct iwl_txq *txq = trans_pcie->txq[txq_id];
1293         unsigned long flags;
1294         int nfreed = 0;
1295         u16 r;
1296
1297         lockdep_assert_held(&txq->lock);
1298
1299         idx = iwl_pcie_get_cmd_index(txq, idx);
1300         r = iwl_pcie_get_cmd_index(txq, txq->read_ptr);
1301
1302         if (idx >= trans->trans_cfg->base_params->max_tfd_queue_size ||
1303             (!iwl_queue_used(txq, idx))) {
1304                 WARN_ONCE(test_bit(txq_id, trans_pcie->queue_used),
1305                           "%s: Read index for DMA queue txq id (%d), index %d is out of range [0-%d] %d %d.\n",
1306                           __func__, txq_id, idx,
1307                           trans->trans_cfg->base_params->max_tfd_queue_size,
1308                           txq->write_ptr, txq->read_ptr);
1309                 return;
1310         }
1311
1312         for (idx = iwl_queue_inc_wrap(trans, idx); r != idx;
1313              r = iwl_queue_inc_wrap(trans, r)) {
1314                 txq->read_ptr = iwl_queue_inc_wrap(trans, txq->read_ptr);
1315
1316                 if (nfreed++ > 0) {
1317                         IWL_ERR(trans, "HCMD skipped: index (%d) %d %d\n",
1318                                 idx, txq->write_ptr, r);
1319                         iwl_force_nmi(trans);
1320                 }
1321         }
1322
1323         if (txq->read_ptr == txq->write_ptr) {
1324                 spin_lock_irqsave(&trans_pcie->reg_lock, flags);
1325                 iwl_pcie_clear_cmd_in_flight(trans);
1326                 spin_unlock_irqrestore(&trans_pcie->reg_lock, flags);
1327         }
1328
1329         iwl_pcie_txq_progress(txq);
1330 }
1331
1332 static int iwl_pcie_txq_set_ratid_map(struct iwl_trans *trans, u16 ra_tid,
1333                                  u16 txq_id)
1334 {
1335         struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
1336         u32 tbl_dw_addr;
1337         u32 tbl_dw;
1338         u16 scd_q2ratid;
1339
1340         scd_q2ratid = ra_tid & SCD_QUEUE_RA_TID_MAP_RATID_MSK;
1341
1342         tbl_dw_addr = trans_pcie->scd_base_addr +
1343                         SCD_TRANS_TBL_OFFSET_QUEUE(txq_id);
1344
1345         tbl_dw = iwl_trans_read_mem32(trans, tbl_dw_addr);
1346
1347         if (txq_id & 0x1)
1348                 tbl_dw = (scd_q2ratid << 16) | (tbl_dw & 0x0000FFFF);
1349         else
1350                 tbl_dw = scd_q2ratid | (tbl_dw & 0xFFFF0000);
1351
1352         iwl_trans_write_mem32(trans, tbl_dw_addr, tbl_dw);
1353
1354         return 0;
1355 }
1356
1357 /* Receiver address (actually, Rx station's index into station table),
1358  * combined with Traffic ID (QOS priority), in format used by Tx Scheduler */
1359 #define BUILD_RAxTID(sta_id, tid)       (((sta_id) << 4) + (tid))
1360
1361 bool iwl_trans_pcie_txq_enable(struct iwl_trans *trans, int txq_id, u16 ssn,
1362                                const struct iwl_trans_txq_scd_cfg *cfg,
1363                                unsigned int wdg_timeout)
1364 {
1365         struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
1366         struct iwl_txq *txq = trans_pcie->txq[txq_id];
1367         int fifo = -1;
1368         bool scd_bug = false;
1369
1370         if (test_and_set_bit(txq_id, trans_pcie->queue_used))
1371                 WARN_ONCE(1, "queue %d already used - expect issues", txq_id);
1372
1373         txq->wd_timeout = msecs_to_jiffies(wdg_timeout);
1374
1375         if (cfg) {
1376                 fifo = cfg->fifo;
1377
1378                 /* Disable the scheduler prior configuring the cmd queue */
1379                 if (txq_id == trans_pcie->cmd_queue &&
1380                     trans_pcie->scd_set_active)
1381                         iwl_scd_enable_set_active(trans, 0);
1382
1383                 /* Stop this Tx queue before configuring it */
1384                 iwl_scd_txq_set_inactive(trans, txq_id);
1385
1386                 /* Set this queue as a chain-building queue unless it is CMD */
1387                 if (txq_id != trans_pcie->cmd_queue)
1388                         iwl_scd_txq_set_chain(trans, txq_id);
1389
1390                 if (cfg->aggregate) {
1391                         u16 ra_tid = BUILD_RAxTID(cfg->sta_id, cfg->tid);
1392
1393                         /* Map receiver-address / traffic-ID to this queue */
1394                         iwl_pcie_txq_set_ratid_map(trans, ra_tid, txq_id);
1395
1396                         /* enable aggregations for the queue */
1397                         iwl_scd_txq_enable_agg(trans, txq_id);
1398                         txq->ampdu = true;
1399                 } else {
1400                         /*
1401                          * disable aggregations for the queue, this will also
1402                          * make the ra_tid mapping configuration irrelevant
1403                          * since it is now a non-AGG queue.
1404                          */
1405                         iwl_scd_txq_disable_agg(trans, txq_id);
1406
1407                         ssn = txq->read_ptr;
1408                 }
1409         } else {
1410                 /*
1411                  * If we need to move the SCD write pointer by steps of
1412                  * 0x40, 0x80 or 0xc0, it gets stuck. Avoids this and let
1413                  * the op_mode know by returning true later.
1414                  * Do this only in case cfg is NULL since this trick can
1415                  * be done only if we have DQA enabled which is true for mvm
1416                  * only. And mvm never sets a cfg pointer.
1417                  * This is really ugly, but this is the easiest way out for
1418                  * this sad hardware issue.
1419                  * This bug has been fixed on devices 9000 and up.
1420                  */
1421                 scd_bug = !trans->trans_cfg->mq_rx_supported &&
1422                         !((ssn - txq->write_ptr) & 0x3f) &&
1423                         (ssn != txq->write_ptr);
1424                 if (scd_bug)
1425                         ssn++;
1426         }
1427
1428         /* Place first TFD at index corresponding to start sequence number.
1429          * Assumes that ssn_idx is valid (!= 0xFFF) */
1430         txq->read_ptr = (ssn & 0xff);
1431         txq->write_ptr = (ssn & 0xff);
1432         iwl_write_direct32(trans, HBUS_TARG_WRPTR,
1433                            (ssn & 0xff) | (txq_id << 8));
1434
1435         if (cfg) {
1436                 u8 frame_limit = cfg->frame_limit;
1437
1438                 iwl_write_prph(trans, SCD_QUEUE_RDPTR(txq_id), ssn);
1439
1440                 /* Set up Tx window size and frame limit for this queue */
1441                 iwl_trans_write_mem32(trans, trans_pcie->scd_base_addr +
1442                                 SCD_CONTEXT_QUEUE_OFFSET(txq_id), 0);
1443                 iwl_trans_write_mem32(trans,
1444                         trans_pcie->scd_base_addr +
1445                         SCD_CONTEXT_QUEUE_OFFSET(txq_id) + sizeof(u32),
1446                         SCD_QUEUE_CTX_REG2_VAL(WIN_SIZE, frame_limit) |
1447                         SCD_QUEUE_CTX_REG2_VAL(FRAME_LIMIT, frame_limit));
1448
1449                 /* Set up status area in SRAM, map to Tx DMA/FIFO, activate */
1450                 iwl_write_prph(trans, SCD_QUEUE_STATUS_BITS(txq_id),
1451                                (1 << SCD_QUEUE_STTS_REG_POS_ACTIVE) |
1452                                (cfg->fifo << SCD_QUEUE_STTS_REG_POS_TXF) |
1453                                (1 << SCD_QUEUE_STTS_REG_POS_WSL) |
1454                                SCD_QUEUE_STTS_REG_MSK);
1455
1456                 /* enable the scheduler for this queue (only) */
1457                 if (txq_id == trans_pcie->cmd_queue &&
1458                     trans_pcie->scd_set_active)
1459                         iwl_scd_enable_set_active(trans, BIT(txq_id));
1460
1461                 IWL_DEBUG_TX_QUEUES(trans,
1462                                     "Activate queue %d on FIFO %d WrPtr: %d\n",
1463                                     txq_id, fifo, ssn & 0xff);
1464         } else {
1465                 IWL_DEBUG_TX_QUEUES(trans,
1466                                     "Activate queue %d WrPtr: %d\n",
1467                                     txq_id, ssn & 0xff);
1468         }
1469
1470         return scd_bug;
1471 }
1472
1473 void iwl_trans_pcie_txq_set_shared_mode(struct iwl_trans *trans, u32 txq_id,
1474                                         bool shared_mode)
1475 {
1476         struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
1477         struct iwl_txq *txq = trans_pcie->txq[txq_id];
1478
1479         txq->ampdu = !shared_mode;
1480 }
1481
1482 void iwl_trans_pcie_txq_disable(struct iwl_trans *trans, int txq_id,
1483                                 bool configure_scd)
1484 {
1485         struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
1486         u32 stts_addr = trans_pcie->scd_base_addr +
1487                         SCD_TX_STTS_QUEUE_OFFSET(txq_id);
1488         static const u32 zero_val[4] = {};
1489
1490         trans_pcie->txq[txq_id]->frozen_expiry_remainder = 0;
1491         trans_pcie->txq[txq_id]->frozen = false;
1492
1493         /*
1494          * Upon HW Rfkill - we stop the device, and then stop the queues
1495          * in the op_mode. Just for the sake of the simplicity of the op_mode,
1496          * allow the op_mode to call txq_disable after it already called
1497          * stop_device.
1498          */
1499         if (!test_and_clear_bit(txq_id, trans_pcie->queue_used)) {
1500                 WARN_ONCE(test_bit(STATUS_DEVICE_ENABLED, &trans->status),
1501                           "queue %d not used", txq_id);
1502                 return;
1503         }
1504
1505         if (configure_scd) {
1506                 iwl_scd_txq_set_inactive(trans, txq_id);
1507
1508                 iwl_trans_write_mem(trans, stts_addr, (void *)zero_val,
1509                                     ARRAY_SIZE(zero_val));
1510         }
1511
1512         iwl_pcie_txq_unmap(trans, txq_id);
1513         trans_pcie->txq[txq_id]->ampdu = false;
1514
1515         IWL_DEBUG_TX_QUEUES(trans, "Deactivate queue %d\n", txq_id);
1516 }
1517
1518 /*************** HOST COMMAND QUEUE FUNCTIONS   *****/
1519
1520 /*
1521  * iwl_pcie_enqueue_hcmd - enqueue a uCode command
1522  * @priv: device private data point
1523  * @cmd: a pointer to the ucode command structure
1524  *
1525  * The function returns < 0 values to indicate the operation
1526  * failed. On success, it returns the index (>= 0) of command in the
1527  * command queue.
1528  */
1529 static int iwl_pcie_enqueue_hcmd(struct iwl_trans *trans,
1530                                  struct iwl_host_cmd *cmd)
1531 {
1532         struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
1533         struct iwl_txq *txq = trans_pcie->txq[trans_pcie->cmd_queue];
1534         struct iwl_device_cmd *out_cmd;
1535         struct iwl_cmd_meta *out_meta;
1536         unsigned long flags;
1537         void *dup_buf = NULL;
1538         dma_addr_t phys_addr;
1539         int idx;
1540         u16 copy_size, cmd_size, tb0_size;
1541         bool had_nocopy = false;
1542         u8 group_id = iwl_cmd_groupid(cmd->id);
1543         int i, ret;
1544         u32 cmd_pos;
1545         const u8 *cmddata[IWL_MAX_CMD_TBS_PER_TFD];
1546         u16 cmdlen[IWL_MAX_CMD_TBS_PER_TFD];
1547
1548         if (WARN(!trans->wide_cmd_header &&
1549                  group_id > IWL_ALWAYS_LONG_GROUP,
1550                  "unsupported wide command %#x\n", cmd->id))
1551                 return -EINVAL;
1552
1553         if (group_id != 0) {
1554                 copy_size = sizeof(struct iwl_cmd_header_wide);
1555                 cmd_size = sizeof(struct iwl_cmd_header_wide);
1556         } else {
1557                 copy_size = sizeof(struct iwl_cmd_header);
1558                 cmd_size = sizeof(struct iwl_cmd_header);
1559         }
1560
1561         /* need one for the header if the first is NOCOPY */
1562         BUILD_BUG_ON(IWL_MAX_CMD_TBS_PER_TFD > IWL_NUM_OF_TBS - 1);
1563
1564         for (i = 0; i < IWL_MAX_CMD_TBS_PER_TFD; i++) {
1565                 cmddata[i] = cmd->data[i];
1566                 cmdlen[i] = cmd->len[i];
1567
1568                 if (!cmd->len[i])
1569                         continue;
1570
1571                 /* need at least IWL_FIRST_TB_SIZE copied */
1572                 if (copy_size < IWL_FIRST_TB_SIZE) {
1573                         int copy = IWL_FIRST_TB_SIZE - copy_size;
1574
1575                         if (copy > cmdlen[i])
1576                                 copy = cmdlen[i];
1577                         cmdlen[i] -= copy;
1578                         cmddata[i] += copy;
1579                         copy_size += copy;
1580                 }
1581
1582                 if (cmd->dataflags[i] & IWL_HCMD_DFL_NOCOPY) {
1583                         had_nocopy = true;
1584                         if (WARN_ON(cmd->dataflags[i] & IWL_HCMD_DFL_DUP)) {
1585                                 idx = -EINVAL;
1586                                 goto free_dup_buf;
1587                         }
1588                 } else if (cmd->dataflags[i] & IWL_HCMD_DFL_DUP) {
1589                         /*
1590                          * This is also a chunk that isn't copied
1591                          * to the static buffer so set had_nocopy.
1592                          */
1593                         had_nocopy = true;
1594
1595                         /* only allowed once */
1596                         if (WARN_ON(dup_buf)) {
1597                                 idx = -EINVAL;
1598                                 goto free_dup_buf;
1599                         }
1600
1601                         dup_buf = kmemdup(cmddata[i], cmdlen[i],
1602                                           GFP_ATOMIC);
1603                         if (!dup_buf)
1604                                 return -ENOMEM;
1605                 } else {
1606                         /* NOCOPY must not be followed by normal! */
1607                         if (WARN_ON(had_nocopy)) {
1608                                 idx = -EINVAL;
1609                                 goto free_dup_buf;
1610                         }
1611                         copy_size += cmdlen[i];
1612                 }
1613                 cmd_size += cmd->len[i];
1614         }
1615
1616         /*
1617          * If any of the command structures end up being larger than
1618          * the TFD_MAX_PAYLOAD_SIZE and they aren't dynamically
1619          * allocated into separate TFDs, then we will need to
1620          * increase the size of the buffers.
1621          */
1622         if (WARN(copy_size > TFD_MAX_PAYLOAD_SIZE,
1623                  "Command %s (%#x) is too large (%d bytes)\n",
1624                  iwl_get_cmd_string(trans, cmd->id),
1625                  cmd->id, copy_size)) {
1626                 idx = -EINVAL;
1627                 goto free_dup_buf;
1628         }
1629
1630         spin_lock_bh(&txq->lock);
1631
1632         if (iwl_queue_space(trans, txq) < ((cmd->flags & CMD_ASYNC) ? 2 : 1)) {
1633                 spin_unlock_bh(&txq->lock);
1634
1635                 IWL_ERR(trans, "No space in command queue\n");
1636                 iwl_op_mode_cmd_queue_full(trans->op_mode);
1637                 idx = -ENOSPC;
1638                 goto free_dup_buf;
1639         }
1640
1641         idx = iwl_pcie_get_cmd_index(txq, txq->write_ptr);
1642         out_cmd = txq->entries[idx].cmd;
1643         out_meta = &txq->entries[idx].meta;
1644
1645         memset(out_meta, 0, sizeof(*out_meta)); /* re-initialize to NULL */
1646         if (cmd->flags & CMD_WANT_SKB)
1647                 out_meta->source = cmd;
1648
1649         /* set up the header */
1650         if (group_id != 0) {
1651                 out_cmd->hdr_wide.cmd = iwl_cmd_opcode(cmd->id);
1652                 out_cmd->hdr_wide.group_id = group_id;
1653                 out_cmd->hdr_wide.version = iwl_cmd_version(cmd->id);
1654                 out_cmd->hdr_wide.length =
1655                         cpu_to_le16(cmd_size -
1656                                     sizeof(struct iwl_cmd_header_wide));
1657                 out_cmd->hdr_wide.reserved = 0;
1658                 out_cmd->hdr_wide.sequence =
1659                         cpu_to_le16(QUEUE_TO_SEQ(trans_pcie->cmd_queue) |
1660                                                  INDEX_TO_SEQ(txq->write_ptr));
1661
1662                 cmd_pos = sizeof(struct iwl_cmd_header_wide);
1663                 copy_size = sizeof(struct iwl_cmd_header_wide);
1664         } else {
1665                 out_cmd->hdr.cmd = iwl_cmd_opcode(cmd->id);
1666                 out_cmd->hdr.sequence =
1667                         cpu_to_le16(QUEUE_TO_SEQ(trans_pcie->cmd_queue) |
1668                                                  INDEX_TO_SEQ(txq->write_ptr));
1669                 out_cmd->hdr.group_id = 0;
1670
1671                 cmd_pos = sizeof(struct iwl_cmd_header);
1672                 copy_size = sizeof(struct iwl_cmd_header);
1673         }
1674
1675         /* and copy the data that needs to be copied */
1676         for (i = 0; i < IWL_MAX_CMD_TBS_PER_TFD; i++) {
1677                 int copy;
1678
1679                 if (!cmd->len[i])
1680                         continue;
1681
1682                 /* copy everything if not nocopy/dup */
1683                 if (!(cmd->dataflags[i] & (IWL_HCMD_DFL_NOCOPY |
1684                                            IWL_HCMD_DFL_DUP))) {
1685                         copy = cmd->len[i];
1686
1687                         memcpy((u8 *)out_cmd + cmd_pos, cmd->data[i], copy);
1688                         cmd_pos += copy;
1689                         copy_size += copy;
1690                         continue;
1691                 }
1692
1693                 /*
1694                  * Otherwise we need at least IWL_FIRST_TB_SIZE copied
1695                  * in total (for bi-directional DMA), but copy up to what
1696                  * we can fit into the payload for debug dump purposes.
1697                  */
1698                 copy = min_t(int, TFD_MAX_PAYLOAD_SIZE - cmd_pos, cmd->len[i]);
1699
1700                 memcpy((u8 *)out_cmd + cmd_pos, cmd->data[i], copy);
1701                 cmd_pos += copy;
1702
1703                 /* However, treat copy_size the proper way, we need it below */
1704                 if (copy_size < IWL_FIRST_TB_SIZE) {
1705                         copy = IWL_FIRST_TB_SIZE - copy_size;
1706
1707                         if (copy > cmd->len[i])
1708                                 copy = cmd->len[i];
1709                         copy_size += copy;
1710                 }
1711         }
1712
1713         IWL_DEBUG_HC(trans,
1714                      "Sending command %s (%.2x.%.2x), seq: 0x%04X, %d bytes at %d[%d]:%d\n",
1715                      iwl_get_cmd_string(trans, cmd->id),
1716                      group_id, out_cmd->hdr.cmd,
1717                      le16_to_cpu(out_cmd->hdr.sequence),
1718                      cmd_size, txq->write_ptr, idx, trans_pcie->cmd_queue);
1719
1720         /* start the TFD with the minimum copy bytes */
1721         tb0_size = min_t(int, copy_size, IWL_FIRST_TB_SIZE);
1722         memcpy(&txq->first_tb_bufs[idx], &out_cmd->hdr, tb0_size);
1723         iwl_pcie_txq_build_tfd(trans, txq,
1724                                iwl_pcie_get_first_tb_dma(txq, idx),
1725                                tb0_size, true);
1726
1727         /* map first command fragment, if any remains */
1728         if (copy_size > tb0_size) {
1729                 phys_addr = dma_map_single(trans->dev,
1730                                            ((u8 *)&out_cmd->hdr) + tb0_size,
1731                                            copy_size - tb0_size,
1732                                            DMA_TO_DEVICE);
1733                 if (dma_mapping_error(trans->dev, phys_addr)) {
1734                         iwl_pcie_tfd_unmap(trans, out_meta, txq,
1735                                            txq->write_ptr);
1736                         idx = -ENOMEM;
1737                         goto out;
1738                 }
1739
1740                 iwl_pcie_txq_build_tfd(trans, txq, phys_addr,
1741                                        copy_size - tb0_size, false);
1742         }
1743
1744         /* map the remaining (adjusted) nocopy/dup fragments */
1745         for (i = 0; i < IWL_MAX_CMD_TBS_PER_TFD; i++) {
1746                 const void *data = cmddata[i];
1747
1748                 if (!cmdlen[i])
1749                         continue;
1750                 if (!(cmd->dataflags[i] & (IWL_HCMD_DFL_NOCOPY |
1751                                            IWL_HCMD_DFL_DUP)))
1752                         continue;
1753                 if (cmd->dataflags[i] & IWL_HCMD_DFL_DUP)
1754                         data = dup_buf;
1755                 phys_addr = dma_map_single(trans->dev, (void *)data,
1756                                            cmdlen[i], DMA_TO_DEVICE);
1757                 if (dma_mapping_error(trans->dev, phys_addr)) {
1758                         iwl_pcie_tfd_unmap(trans, out_meta, txq,
1759                                            txq->write_ptr);
1760                         idx = -ENOMEM;
1761                         goto out;
1762                 }
1763
1764                 iwl_pcie_txq_build_tfd(trans, txq, phys_addr, cmdlen[i], false);
1765         }
1766
1767         BUILD_BUG_ON(IWL_TFH_NUM_TBS > sizeof(out_meta->tbs) * BITS_PER_BYTE);
1768         out_meta->flags = cmd->flags;
1769         if (WARN_ON_ONCE(txq->entries[idx].free_buf))
1770                 kzfree(txq->entries[idx].free_buf);
1771         txq->entries[idx].free_buf = dup_buf;
1772
1773         trace_iwlwifi_dev_hcmd(trans->dev, cmd, cmd_size, &out_cmd->hdr_wide);
1774
1775         /* start timer if queue currently empty */
1776         if (txq->read_ptr == txq->write_ptr && txq->wd_timeout)
1777                 mod_timer(&txq->stuck_timer, jiffies + txq->wd_timeout);
1778
1779         spin_lock_irqsave(&trans_pcie->reg_lock, flags);
1780         ret = iwl_pcie_set_cmd_in_flight(trans, cmd);
1781         if (ret < 0) {
1782                 idx = ret;
1783                 spin_unlock_irqrestore(&trans_pcie->reg_lock, flags);
1784                 goto out;
1785         }
1786
1787         /* Increment and update queue's write index */
1788         txq->write_ptr = iwl_queue_inc_wrap(trans, txq->write_ptr);
1789         iwl_pcie_txq_inc_wr_ptr(trans, txq);
1790
1791         spin_unlock_irqrestore(&trans_pcie->reg_lock, flags);
1792
1793  out:
1794         spin_unlock_bh(&txq->lock);
1795  free_dup_buf:
1796         if (idx < 0)
1797                 kfree(dup_buf);
1798         return idx;
1799 }
1800
1801 /*
1802  * iwl_pcie_hcmd_complete - Pull unused buffers off the queue and reclaim them
1803  * @rxb: Rx buffer to reclaim
1804  */
1805 void iwl_pcie_hcmd_complete(struct iwl_trans *trans,
1806                             struct iwl_rx_cmd_buffer *rxb)
1807 {
1808         struct iwl_rx_packet *pkt = rxb_addr(rxb);
1809         u16 sequence = le16_to_cpu(pkt->hdr.sequence);
1810         u8 group_id;
1811         u32 cmd_id;
1812         int txq_id = SEQ_TO_QUEUE(sequence);
1813         int index = SEQ_TO_INDEX(sequence);
1814         int cmd_index;
1815         struct iwl_device_cmd *cmd;
1816         struct iwl_cmd_meta *meta;
1817         struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
1818         struct iwl_txq *txq = trans_pcie->txq[trans_pcie->cmd_queue];
1819
1820         /* If a Tx command is being handled and it isn't in the actual
1821          * command queue then there a command routing bug has been introduced
1822          * in the queue management code. */
1823         if (WARN(txq_id != trans_pcie->cmd_queue,
1824                  "wrong command queue %d (should be %d), sequence 0x%X readp=%d writep=%d\n",
1825                  txq_id, trans_pcie->cmd_queue, sequence, txq->read_ptr,
1826                  txq->write_ptr)) {
1827                 iwl_print_hex_error(trans, pkt, 32);
1828                 return;
1829         }
1830
1831         spin_lock_bh(&txq->lock);
1832
1833         cmd_index = iwl_pcie_get_cmd_index(txq, index);
1834         cmd = txq->entries[cmd_index].cmd;
1835         meta = &txq->entries[cmd_index].meta;
1836         group_id = cmd->hdr.group_id;
1837         cmd_id = iwl_cmd_id(cmd->hdr.cmd, group_id, 0);
1838
1839         iwl_pcie_tfd_unmap(trans, meta, txq, index);
1840
1841         /* Input error checking is done when commands are added to queue. */
1842         if (meta->flags & CMD_WANT_SKB) {
1843                 struct page *p = rxb_steal_page(rxb);
1844
1845                 meta->source->resp_pkt = pkt;
1846                 meta->source->_rx_page_addr = (unsigned long)page_address(p);
1847                 meta->source->_rx_page_order = trans_pcie->rx_page_order;
1848         }
1849
1850         if (meta->flags & CMD_WANT_ASYNC_CALLBACK)
1851                 iwl_op_mode_async_cb(trans->op_mode, cmd);
1852
1853         iwl_pcie_cmdq_reclaim(trans, txq_id, index);
1854
1855         if (!(meta->flags & CMD_ASYNC)) {
1856                 if (!test_bit(STATUS_SYNC_HCMD_ACTIVE, &trans->status)) {
1857                         IWL_WARN(trans,
1858                                  "HCMD_ACTIVE already clear for command %s\n",
1859                                  iwl_get_cmd_string(trans, cmd_id));
1860                 }
1861                 clear_bit(STATUS_SYNC_HCMD_ACTIVE, &trans->status);
1862                 IWL_DEBUG_INFO(trans, "Clearing HCMD_ACTIVE for command %s\n",
1863                                iwl_get_cmd_string(trans, cmd_id));
1864                 wake_up(&trans_pcie->wait_command_queue);
1865         }
1866
1867         meta->flags = 0;
1868
1869         spin_unlock_bh(&txq->lock);
1870 }
1871
1872 #define HOST_COMPLETE_TIMEOUT   (2 * HZ)
1873
1874 static int iwl_pcie_send_hcmd_async(struct iwl_trans *trans,
1875                                     struct iwl_host_cmd *cmd)
1876 {
1877         int ret;
1878
1879         /* An asynchronous command can not expect an SKB to be set. */
1880         if (WARN_ON(cmd->flags & CMD_WANT_SKB))
1881                 return -EINVAL;
1882
1883         ret = iwl_pcie_enqueue_hcmd(trans, cmd);
1884         if (ret < 0) {
1885                 IWL_ERR(trans,
1886                         "Error sending %s: enqueue_hcmd failed: %d\n",
1887                         iwl_get_cmd_string(trans, cmd->id), ret);
1888                 return ret;
1889         }
1890         return 0;
1891 }
1892
1893 static int iwl_pcie_send_hcmd_sync(struct iwl_trans *trans,
1894                                    struct iwl_host_cmd *cmd)
1895 {
1896         struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
1897         struct iwl_txq *txq = trans_pcie->txq[trans_pcie->cmd_queue];
1898         int cmd_idx;
1899         int ret;
1900
1901         IWL_DEBUG_INFO(trans, "Attempting to send sync command %s\n",
1902                        iwl_get_cmd_string(trans, cmd->id));
1903
1904         if (WARN(test_and_set_bit(STATUS_SYNC_HCMD_ACTIVE,
1905                                   &trans->status),
1906                  "Command %s: a command is already active!\n",
1907                  iwl_get_cmd_string(trans, cmd->id)))
1908                 return -EIO;
1909
1910         IWL_DEBUG_INFO(trans, "Setting HCMD_ACTIVE for command %s\n",
1911                        iwl_get_cmd_string(trans, cmd->id));
1912
1913         cmd_idx = iwl_pcie_enqueue_hcmd(trans, cmd);
1914         if (cmd_idx < 0) {
1915                 ret = cmd_idx;
1916                 clear_bit(STATUS_SYNC_HCMD_ACTIVE, &trans->status);
1917                 IWL_ERR(trans,
1918                         "Error sending %s: enqueue_hcmd failed: %d\n",
1919                         iwl_get_cmd_string(trans, cmd->id), ret);
1920                 return ret;
1921         }
1922
1923         ret = wait_event_timeout(trans_pcie->wait_command_queue,
1924                                  !test_bit(STATUS_SYNC_HCMD_ACTIVE,
1925                                            &trans->status),
1926                                  HOST_COMPLETE_TIMEOUT);
1927         if (!ret) {
1928                 IWL_ERR(trans, "Error sending %s: time out after %dms.\n",
1929                         iwl_get_cmd_string(trans, cmd->id),
1930                         jiffies_to_msecs(HOST_COMPLETE_TIMEOUT));
1931
1932                 IWL_ERR(trans, "Current CMD queue read_ptr %d write_ptr %d\n",
1933                         txq->read_ptr, txq->write_ptr);
1934
1935                 clear_bit(STATUS_SYNC_HCMD_ACTIVE, &trans->status);
1936                 IWL_DEBUG_INFO(trans, "Clearing HCMD_ACTIVE for command %s\n",
1937                                iwl_get_cmd_string(trans, cmd->id));
1938                 ret = -ETIMEDOUT;
1939
1940                 iwl_trans_pcie_sync_nmi(trans);
1941                 goto cancel;
1942         }
1943
1944         if (test_bit(STATUS_FW_ERROR, &trans->status)) {
1945                 iwl_trans_pcie_dump_regs(trans);
1946                 IWL_ERR(trans, "FW error in SYNC CMD %s\n",
1947                         iwl_get_cmd_string(trans, cmd->id));
1948                 dump_stack();
1949                 ret = -EIO;
1950                 goto cancel;
1951         }
1952
1953         if (!(cmd->flags & CMD_SEND_IN_RFKILL) &&
1954             test_bit(STATUS_RFKILL_OPMODE, &trans->status)) {
1955                 IWL_DEBUG_RF_KILL(trans, "RFKILL in SYNC CMD... no rsp\n");
1956                 ret = -ERFKILL;
1957                 goto cancel;
1958         }
1959
1960         if ((cmd->flags & CMD_WANT_SKB) && !cmd->resp_pkt) {
1961                 IWL_ERR(trans, "Error: Response NULL in '%s'\n",
1962                         iwl_get_cmd_string(trans, cmd->id));
1963                 ret = -EIO;
1964                 goto cancel;
1965         }
1966
1967         return 0;
1968
1969 cancel:
1970         if (cmd->flags & CMD_WANT_SKB) {
1971                 /*
1972                  * Cancel the CMD_WANT_SKB flag for the cmd in the
1973                  * TX cmd queue. Otherwise in case the cmd comes
1974                  * in later, it will possibly set an invalid
1975                  * address (cmd->meta.source).
1976                  */
1977                 txq->entries[cmd_idx].meta.flags &= ~CMD_WANT_SKB;
1978         }
1979
1980         if (cmd->resp_pkt) {
1981                 iwl_free_resp(cmd);
1982                 cmd->resp_pkt = NULL;
1983         }
1984
1985         return ret;
1986 }
1987
1988 int iwl_trans_pcie_send_hcmd(struct iwl_trans *trans, struct iwl_host_cmd *cmd)
1989 {
1990         /* Make sure the NIC is still alive in the bus */
1991         if (test_bit(STATUS_TRANS_DEAD, &trans->status))
1992                 return -ENODEV;
1993
1994         if (!(cmd->flags & CMD_SEND_IN_RFKILL) &&
1995             test_bit(STATUS_RFKILL_OPMODE, &trans->status)) {
1996                 IWL_DEBUG_RF_KILL(trans, "Dropping CMD 0x%x: RF KILL\n",
1997                                   cmd->id);
1998                 return -ERFKILL;
1999         }
2000
2001         if (cmd->flags & CMD_ASYNC)
2002                 return iwl_pcie_send_hcmd_async(trans, cmd);
2003
2004         /* We still can fail on RFKILL that can be asserted while we wait */
2005         return iwl_pcie_send_hcmd_sync(trans, cmd);
2006 }
2007
2008 static int iwl_fill_data_tbs(struct iwl_trans *trans, struct sk_buff *skb,
2009                              struct iwl_txq *txq, u8 hdr_len,
2010                              struct iwl_cmd_meta *out_meta)
2011 {
2012         u16 head_tb_len;
2013         int i;
2014
2015         /*
2016          * Set up TFD's third entry to point directly to remainder
2017          * of skb's head, if any
2018          */
2019         head_tb_len = skb_headlen(skb) - hdr_len;
2020
2021         if (head_tb_len > 0) {
2022                 dma_addr_t tb_phys = dma_map_single(trans->dev,
2023                                                     skb->data + hdr_len,
2024                                                     head_tb_len, DMA_TO_DEVICE);
2025                 if (unlikely(dma_mapping_error(trans->dev, tb_phys)))
2026                         return -EINVAL;
2027                 trace_iwlwifi_dev_tx_tb(trans->dev, skb, skb->data + hdr_len,
2028                                         tb_phys, head_tb_len);
2029                 iwl_pcie_txq_build_tfd(trans, txq, tb_phys, head_tb_len, false);
2030         }
2031
2032         /* set up the remaining entries to point to the data */
2033         for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
2034                 const skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
2035                 dma_addr_t tb_phys;
2036                 int tb_idx;
2037
2038                 if (!skb_frag_size(frag))
2039                         continue;
2040
2041                 tb_phys = skb_frag_dma_map(trans->dev, frag, 0,
2042                                            skb_frag_size(frag), DMA_TO_DEVICE);
2043
2044                 if (unlikely(dma_mapping_error(trans->dev, tb_phys)))
2045                         return -EINVAL;
2046                 trace_iwlwifi_dev_tx_tb(trans->dev, skb, skb_frag_address(frag),
2047                                         tb_phys, skb_frag_size(frag));
2048                 tb_idx = iwl_pcie_txq_build_tfd(trans, txq, tb_phys,
2049                                                 skb_frag_size(frag), false);
2050                 if (tb_idx < 0)
2051                         return tb_idx;
2052
2053                 out_meta->tbs |= BIT(tb_idx);
2054         }
2055
2056         return 0;
2057 }
2058
2059 #ifdef CONFIG_INET
2060 struct iwl_tso_hdr_page *get_page_hdr(struct iwl_trans *trans, size_t len,
2061                                       struct sk_buff *skb)
2062 {
2063         struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
2064         struct iwl_tso_hdr_page *p = this_cpu_ptr(trans_pcie->tso_hdr_page);
2065         struct page **page_ptr;
2066
2067         page_ptr = (void *)((u8 *)skb->cb + trans_pcie->page_offs);
2068
2069         if (WARN_ON(*page_ptr))
2070                 return NULL;
2071
2072         if (!p->page)
2073                 goto alloc;
2074
2075         /*
2076          * Check if there's enough room on this page
2077          *
2078          * Note that we put a page chaining pointer *last* in the
2079          * page - we need it somewhere, and if it's there then we
2080          * avoid DMA mapping the last bits of the page which may
2081          * trigger the 32-bit boundary hardware bug.
2082          *
2083          * (see also get_workaround_page() in tx-gen2.c)
2084          */
2085         if (p->pos + len < (u8 *)page_address(p->page) + PAGE_SIZE -
2086                            sizeof(void *))
2087                 goto out;
2088
2089         /* We don't have enough room on this page, get a new one. */
2090         __free_page(p->page);
2091
2092 alloc:
2093         p->page = alloc_page(GFP_ATOMIC);
2094         if (!p->page)
2095                 return NULL;
2096         p->pos = page_address(p->page);
2097         /* set the chaining pointer to NULL */
2098         *(void **)(page_address(p->page) + PAGE_SIZE - sizeof(void *)) = NULL;
2099 out:
2100         *page_ptr = p->page;
2101         get_page(p->page);
2102         return p;
2103 }
2104
2105 static void iwl_compute_pseudo_hdr_csum(void *iph, struct tcphdr *tcph,
2106                                         bool ipv6, unsigned int len)
2107 {
2108         if (ipv6) {
2109                 struct ipv6hdr *iphv6 = iph;
2110
2111                 tcph->check = ~csum_ipv6_magic(&iphv6->saddr, &iphv6->daddr,
2112                                                len + tcph->doff * 4,
2113                                                IPPROTO_TCP, 0);
2114         } else {
2115                 struct iphdr *iphv4 = iph;
2116
2117                 ip_send_check(iphv4);
2118                 tcph->check = ~csum_tcpudp_magic(iphv4->saddr, iphv4->daddr,
2119                                                  len + tcph->doff * 4,
2120                                                  IPPROTO_TCP, 0);
2121         }
2122 }
2123
2124 static int iwl_fill_data_tbs_amsdu(struct iwl_trans *trans, struct sk_buff *skb,
2125                                    struct iwl_txq *txq, u8 hdr_len,
2126                                    struct iwl_cmd_meta *out_meta,
2127                                    struct iwl_device_tx_cmd *dev_cmd,
2128                                    u16 tb1_len)
2129 {
2130         struct iwl_tx_cmd *tx_cmd = (void *)dev_cmd->payload;
2131         struct iwl_trans_pcie *trans_pcie =
2132                 IWL_TRANS_GET_PCIE_TRANS(txq->trans);
2133         struct ieee80211_hdr *hdr = (void *)skb->data;
2134         unsigned int snap_ip_tcp_hdrlen, ip_hdrlen, total_len, hdr_room;
2135         unsigned int mss = skb_shinfo(skb)->gso_size;
2136         u16 length, iv_len, amsdu_pad;
2137         u8 *start_hdr;
2138         struct iwl_tso_hdr_page *hdr_page;
2139         struct tso_t tso;
2140
2141         /* if the packet is protected, then it must be CCMP or GCMP */
2142         BUILD_BUG_ON(IEEE80211_CCMP_HDR_LEN != IEEE80211_GCMP_HDR_LEN);
2143         iv_len = ieee80211_has_protected(hdr->frame_control) ?
2144                 IEEE80211_CCMP_HDR_LEN : 0;
2145
2146         trace_iwlwifi_dev_tx(trans->dev, skb,
2147                              iwl_pcie_get_tfd(trans, txq, txq->write_ptr),
2148                              trans_pcie->tfd_size,
2149                              &dev_cmd->hdr, IWL_FIRST_TB_SIZE + tb1_len, 0);
2150
2151         ip_hdrlen = skb_transport_header(skb) - skb_network_header(skb);
2152         snap_ip_tcp_hdrlen = 8 + ip_hdrlen + tcp_hdrlen(skb);
2153         total_len = skb->len - snap_ip_tcp_hdrlen - hdr_len - iv_len;
2154         amsdu_pad = 0;
2155
2156         /* total amount of header we may need for this A-MSDU */
2157         hdr_room = DIV_ROUND_UP(total_len, mss) *
2158                 (3 + snap_ip_tcp_hdrlen + sizeof(struct ethhdr)) + iv_len;
2159
2160         /* Our device supports 9 segments at most, it will fit in 1 page */
2161         hdr_page = get_page_hdr(trans, hdr_room, skb);
2162         if (!hdr_page)
2163                 return -ENOMEM;
2164
2165         start_hdr = hdr_page->pos;
2166         memcpy(hdr_page->pos, skb->data + hdr_len, iv_len);
2167         hdr_page->pos += iv_len;
2168
2169         /*
2170          * Pull the ieee80211 header + IV to be able to use TSO core,
2171          * we will restore it for the tx_status flow.
2172          */
2173         skb_pull(skb, hdr_len + iv_len);
2174
2175         /*
2176          * Remove the length of all the headers that we don't actually
2177          * have in the MPDU by themselves, but that we duplicate into
2178          * all the different MSDUs inside the A-MSDU.
2179          */
2180         le16_add_cpu(&tx_cmd->len, -snap_ip_tcp_hdrlen);
2181
2182         tso_start(skb, &tso);
2183
2184         while (total_len) {
2185                 /* this is the data left for this subframe */
2186                 unsigned int data_left =
2187                         min_t(unsigned int, mss, total_len);
2188                 struct sk_buff *csum_skb = NULL;
2189                 unsigned int hdr_tb_len;
2190                 dma_addr_t hdr_tb_phys;
2191                 struct tcphdr *tcph;
2192                 u8 *iph, *subf_hdrs_start = hdr_page->pos;
2193
2194                 total_len -= data_left;
2195
2196                 memset(hdr_page->pos, 0, amsdu_pad);
2197                 hdr_page->pos += amsdu_pad;
2198                 amsdu_pad = (4 - (sizeof(struct ethhdr) + snap_ip_tcp_hdrlen +
2199                                   data_left)) & 0x3;
2200                 ether_addr_copy(hdr_page->pos, ieee80211_get_DA(hdr));
2201                 hdr_page->pos += ETH_ALEN;
2202                 ether_addr_copy(hdr_page->pos, ieee80211_get_SA(hdr));
2203                 hdr_page->pos += ETH_ALEN;
2204
2205                 length = snap_ip_tcp_hdrlen + data_left;
2206                 *((__be16 *)hdr_page->pos) = cpu_to_be16(length);
2207                 hdr_page->pos += sizeof(length);
2208
2209                 /*
2210                  * This will copy the SNAP as well which will be considered
2211                  * as MAC header.
2212                  */
2213                 tso_build_hdr(skb, hdr_page->pos, &tso, data_left, !total_len);
2214                 iph = hdr_page->pos + 8;
2215                 tcph = (void *)(iph + ip_hdrlen);
2216
2217                 /* For testing on current hardware only */
2218                 if (trans_pcie->sw_csum_tx) {
2219                         csum_skb = alloc_skb(data_left + tcp_hdrlen(skb),
2220                                              GFP_ATOMIC);
2221                         if (!csum_skb)
2222                                 return -ENOMEM;
2223
2224                         iwl_compute_pseudo_hdr_csum(iph, tcph,
2225                                                     skb->protocol ==
2226                                                         htons(ETH_P_IPV6),
2227                                                     data_left);
2228
2229                         skb_put_data(csum_skb, tcph, tcp_hdrlen(skb));
2230                         skb_reset_transport_header(csum_skb);
2231                         csum_skb->csum_start =
2232                                 (unsigned char *)tcp_hdr(csum_skb) -
2233                                                  csum_skb->head;
2234                 }
2235
2236                 hdr_page->pos += snap_ip_tcp_hdrlen;
2237
2238                 hdr_tb_len = hdr_page->pos - start_hdr;
2239                 hdr_tb_phys = dma_map_single(trans->dev, start_hdr,
2240                                              hdr_tb_len, DMA_TO_DEVICE);
2241                 if (unlikely(dma_mapping_error(trans->dev, hdr_tb_phys))) {
2242                         dev_kfree_skb(csum_skb);
2243                         return -EINVAL;
2244                 }
2245                 iwl_pcie_txq_build_tfd(trans, txq, hdr_tb_phys,
2246                                        hdr_tb_len, false);
2247                 trace_iwlwifi_dev_tx_tb(trans->dev, skb, start_hdr,
2248                                         hdr_tb_phys, hdr_tb_len);
2249                 /* add this subframe's headers' length to the tx_cmd */
2250                 le16_add_cpu(&tx_cmd->len, hdr_page->pos - subf_hdrs_start);
2251
2252                 /* prepare the start_hdr for the next subframe */
2253                 start_hdr = hdr_page->pos;
2254
2255                 /* put the payload */
2256                 while (data_left) {
2257                         unsigned int size = min_t(unsigned int, tso.size,
2258                                                   data_left);
2259                         dma_addr_t tb_phys;
2260
2261                         if (trans_pcie->sw_csum_tx)
2262                                 skb_put_data(csum_skb, tso.data, size);
2263
2264                         tb_phys = dma_map_single(trans->dev, tso.data,
2265                                                  size, DMA_TO_DEVICE);
2266                         if (unlikely(dma_mapping_error(trans->dev, tb_phys))) {
2267                                 dev_kfree_skb(csum_skb);
2268                                 return -EINVAL;
2269                         }
2270
2271                         iwl_pcie_txq_build_tfd(trans, txq, tb_phys,
2272                                                size, false);
2273                         trace_iwlwifi_dev_tx_tb(trans->dev, skb, tso.data,
2274                                                 tb_phys, size);
2275
2276                         data_left -= size;
2277                         tso_build_data(skb, &tso, size);
2278                 }
2279
2280                 /* For testing on early hardware only */
2281                 if (trans_pcie->sw_csum_tx) {
2282                         __wsum csum;
2283
2284                         csum = skb_checksum(csum_skb,
2285                                             skb_checksum_start_offset(csum_skb),
2286                                             csum_skb->len -
2287                                             skb_checksum_start_offset(csum_skb),
2288                                             0);
2289                         dev_kfree_skb(csum_skb);
2290                         dma_sync_single_for_cpu(trans->dev, hdr_tb_phys,
2291                                                 hdr_tb_len, DMA_TO_DEVICE);
2292                         tcph->check = csum_fold(csum);
2293                         dma_sync_single_for_device(trans->dev, hdr_tb_phys,
2294                                                    hdr_tb_len, DMA_TO_DEVICE);
2295                 }
2296         }
2297
2298         /* re -add the WiFi header and IV */
2299         skb_push(skb, hdr_len + iv_len);
2300
2301         return 0;
2302 }
2303 #else /* CONFIG_INET */
2304 static int iwl_fill_data_tbs_amsdu(struct iwl_trans *trans, struct sk_buff *skb,
2305                                    struct iwl_txq *txq, u8 hdr_len,
2306                                    struct iwl_cmd_meta *out_meta,
2307                                    struct iwl_device_tx_cmd *dev_cmd,
2308                                    u16 tb1_len)
2309 {
2310         /* No A-MSDU without CONFIG_INET */
2311         WARN_ON(1);
2312
2313         return -1;
2314 }
2315 #endif /* CONFIG_INET */
2316
2317 int iwl_trans_pcie_tx(struct iwl_trans *trans, struct sk_buff *skb,
2318                       struct iwl_device_tx_cmd *dev_cmd, int txq_id)
2319 {
2320         struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
2321         struct ieee80211_hdr *hdr;
2322         struct iwl_tx_cmd *tx_cmd = (struct iwl_tx_cmd *)dev_cmd->payload;
2323         struct iwl_cmd_meta *out_meta;
2324         struct iwl_txq *txq;
2325         dma_addr_t tb0_phys, tb1_phys, scratch_phys;
2326         void *tb1_addr;
2327         void *tfd;
2328         u16 len, tb1_len;
2329         bool wait_write_ptr;
2330         __le16 fc;
2331         u8 hdr_len;
2332         u16 wifi_seq;
2333         bool amsdu;
2334
2335         txq = trans_pcie->txq[txq_id];
2336
2337         if (WARN_ONCE(!test_bit(txq_id, trans_pcie->queue_used),
2338                       "TX on unused queue %d\n", txq_id))
2339                 return -EINVAL;
2340
2341         if (unlikely(trans_pcie->sw_csum_tx &&
2342                      skb->ip_summed == CHECKSUM_PARTIAL)) {
2343                 int offs = skb_checksum_start_offset(skb);
2344                 int csum_offs = offs + skb->csum_offset;
2345                 __wsum csum;
2346
2347                 if (skb_ensure_writable(skb, csum_offs + sizeof(__sum16)))
2348                         return -1;
2349
2350                 csum = skb_checksum(skb, offs, skb->len - offs, 0);
2351                 *(__sum16 *)(skb->data + csum_offs) = csum_fold(csum);
2352
2353                 skb->ip_summed = CHECKSUM_UNNECESSARY;
2354         }
2355
2356         if (skb_is_nonlinear(skb) &&
2357             skb_shinfo(skb)->nr_frags > IWL_PCIE_MAX_FRAGS(trans_pcie) &&
2358             __skb_linearize(skb))
2359                 return -ENOMEM;
2360
2361         /* mac80211 always puts the full header into the SKB's head,
2362          * so there's no need to check if it's readable there
2363          */
2364         hdr = (struct ieee80211_hdr *)skb->data;
2365         fc = hdr->frame_control;
2366         hdr_len = ieee80211_hdrlen(fc);
2367
2368         spin_lock(&txq->lock);
2369
2370         if (iwl_queue_space(trans, txq) < txq->high_mark) {
2371                 iwl_stop_queue(trans, txq);
2372
2373                 /* don't put the packet on the ring, if there is no room */
2374                 if (unlikely(iwl_queue_space(trans, txq) < 3)) {
2375                         struct iwl_device_tx_cmd **dev_cmd_ptr;
2376
2377                         dev_cmd_ptr = (void *)((u8 *)skb->cb +
2378                                                trans_pcie->dev_cmd_offs);
2379
2380                         *dev_cmd_ptr = dev_cmd;
2381                         __skb_queue_tail(&txq->overflow_q, skb);
2382
2383                         spin_unlock(&txq->lock);
2384                         return 0;
2385                 }
2386         }
2387
2388         /* In AGG mode, the index in the ring must correspond to the WiFi
2389          * sequence number. This is a HW requirements to help the SCD to parse
2390          * the BA.
2391          * Check here that the packets are in the right place on the ring.
2392          */
2393         wifi_seq = IEEE80211_SEQ_TO_SN(le16_to_cpu(hdr->seq_ctrl));
2394         WARN_ONCE(txq->ampdu &&
2395                   (wifi_seq & 0xff) != txq->write_ptr,
2396                   "Q: %d WiFi Seq %d tfdNum %d",
2397                   txq_id, wifi_seq, txq->write_ptr);
2398
2399         /* Set up driver data for this TFD */
2400         txq->entries[txq->write_ptr].skb = skb;
2401         txq->entries[txq->write_ptr].cmd = dev_cmd;
2402
2403         dev_cmd->hdr.sequence =
2404                 cpu_to_le16((u16)(QUEUE_TO_SEQ(txq_id) |
2405                             INDEX_TO_SEQ(txq->write_ptr)));
2406
2407         tb0_phys = iwl_pcie_get_first_tb_dma(txq, txq->write_ptr);
2408         scratch_phys = tb0_phys + sizeof(struct iwl_cmd_header) +
2409                        offsetof(struct iwl_tx_cmd, scratch);
2410
2411         tx_cmd->dram_lsb_ptr = cpu_to_le32(scratch_phys);
2412         tx_cmd->dram_msb_ptr = iwl_get_dma_hi_addr(scratch_phys);
2413
2414         /* Set up first empty entry in queue's array of Tx/cmd buffers */
2415         out_meta = &txq->entries[txq->write_ptr].meta;
2416         out_meta->flags = 0;
2417
2418         /*
2419          * The second TB (tb1) points to the remainder of the TX command
2420          * and the 802.11 header - dword aligned size
2421          * (This calculation modifies the TX command, so do it before the
2422          * setup of the first TB)
2423          */
2424         len = sizeof(struct iwl_tx_cmd) + sizeof(struct iwl_cmd_header) +
2425               hdr_len - IWL_FIRST_TB_SIZE;
2426         /* do not align A-MSDU to dword as the subframe header aligns it */
2427         amsdu = ieee80211_is_data_qos(fc) &&
2428                 (*ieee80211_get_qos_ctl(hdr) &
2429                  IEEE80211_QOS_CTL_A_MSDU_PRESENT);
2430         if (trans_pcie->sw_csum_tx || !amsdu) {
2431                 tb1_len = ALIGN(len, 4);
2432                 /* Tell NIC about any 2-byte padding after MAC header */
2433                 if (tb1_len != len)
2434                         tx_cmd->tx_flags |= cpu_to_le32(TX_CMD_FLG_MH_PAD);
2435         } else {
2436                 tb1_len = len;
2437         }
2438
2439         /*
2440          * The first TB points to bi-directional DMA data, we'll
2441          * memcpy the data into it later.
2442          */
2443         iwl_pcie_txq_build_tfd(trans, txq, tb0_phys,
2444                                IWL_FIRST_TB_SIZE, true);
2445
2446         /* there must be data left over for TB1 or this code must be changed */
2447         BUILD_BUG_ON(sizeof(struct iwl_tx_cmd) < IWL_FIRST_TB_SIZE);
2448
2449         /* map the data for TB1 */
2450         tb1_addr = ((u8 *)&dev_cmd->hdr) + IWL_FIRST_TB_SIZE;
2451         tb1_phys = dma_map_single(trans->dev, tb1_addr, tb1_len, DMA_TO_DEVICE);
2452         if (unlikely(dma_mapping_error(trans->dev, tb1_phys)))
2453                 goto out_err;
2454         iwl_pcie_txq_build_tfd(trans, txq, tb1_phys, tb1_len, false);
2455
2456         trace_iwlwifi_dev_tx(trans->dev, skb,
2457                              iwl_pcie_get_tfd(trans, txq,
2458                                               txq->write_ptr),
2459                              trans_pcie->tfd_size,
2460                              &dev_cmd->hdr, IWL_FIRST_TB_SIZE + tb1_len,
2461                              hdr_len);
2462
2463         /*
2464          * If gso_size wasn't set, don't give the frame "amsdu treatment"
2465          * (adding subframes, etc.).
2466          * This can happen in some testing flows when the amsdu was already
2467          * pre-built, and we just need to send the resulting skb.
2468          */
2469         if (amsdu && skb_shinfo(skb)->gso_size) {
2470                 if (unlikely(iwl_fill_data_tbs_amsdu(trans, skb, txq, hdr_len,
2471                                                      out_meta, dev_cmd,
2472                                                      tb1_len)))
2473                         goto out_err;
2474         } else {
2475                 struct sk_buff *frag;
2476
2477                 if (unlikely(iwl_fill_data_tbs(trans, skb, txq, hdr_len,
2478                                                out_meta)))
2479                         goto out_err;
2480
2481                 skb_walk_frags(skb, frag) {
2482                         if (unlikely(iwl_fill_data_tbs(trans, frag, txq, 0,
2483                                                        out_meta)))
2484                                 goto out_err;
2485                 }
2486         }
2487
2488         /* building the A-MSDU might have changed this data, so memcpy it now */
2489         memcpy(&txq->first_tb_bufs[txq->write_ptr], dev_cmd, IWL_FIRST_TB_SIZE);
2490
2491         tfd = iwl_pcie_get_tfd(trans, txq, txq->write_ptr);
2492         /* Set up entry for this TFD in Tx byte-count array */
2493         iwl_pcie_txq_update_byte_cnt_tbl(trans, txq, le16_to_cpu(tx_cmd->len),
2494                                          iwl_pcie_tfd_get_num_tbs(trans, tfd));
2495
2496         wait_write_ptr = ieee80211_has_morefrags(fc);
2497
2498         /* start timer if queue currently empty */
2499         if (txq->read_ptr == txq->write_ptr && txq->wd_timeout) {
2500                 /*
2501                  * If the TXQ is active, then set the timer, if not,
2502                  * set the timer in remainder so that the timer will
2503                  * be armed with the right value when the station will
2504                  * wake up.
2505                  */
2506                 if (!txq->frozen)
2507                         mod_timer(&txq->stuck_timer,
2508                                   jiffies + txq->wd_timeout);
2509                 else
2510                         txq->frozen_expiry_remainder = txq->wd_timeout;
2511         }
2512
2513         /* Tell device the write index *just past* this latest filled TFD */
2514         txq->write_ptr = iwl_queue_inc_wrap(trans, txq->write_ptr);
2515         if (!wait_write_ptr)
2516                 iwl_pcie_txq_inc_wr_ptr(trans, txq);
2517
2518         /*
2519          * At this point the frame is "transmitted" successfully
2520          * and we will get a TX status notification eventually.
2521          */
2522         spin_unlock(&txq->lock);
2523         return 0;
2524 out_err:
2525         iwl_pcie_tfd_unmap(trans, out_meta, txq, txq->write_ptr);
2526         spin_unlock(&txq->lock);
2527         return -1;
2528 }