Merge tag 'for-linus-20180623' of git://git.kernel.dk/linux-block
authorLinus Torvalds <torvalds@linux-foundation.org>
Sat, 23 Jun 2018 22:33:54 +0000 (06:33 +0800)
committerLinus Torvalds <torvalds@linux-foundation.org>
Sat, 23 Jun 2018 22:33:54 +0000 (06:33 +0800)
Pull block fixes from Jens Axboe:

 - Further timeout fixes. We aren't quite there yet, so expect another
   round of fixes for that to completely close some of the IRQ vs
   completion races. (Christoph/Bart)

 - Set of NVMe fixes from the usual suspects, mostly error handling

 - Two off-by-one fixes (Dan)

 - Another bdi race fix (Jan)

 - Fix nbd reconfigure with NBD_DISCONNECT_ON_CLOSE (Doron)

* tag 'for-linus-20180623' of git://git.kernel.dk/linux-block:
  blk-mq: Fix timeout handling in case the timeout handler returns BLK_EH_DONE
  bdi: Fix another oops in wb_workfn()
  lightnvm: Remove depends on HAS_DMA in case of platform dependency
  nvme-pci: limit max IO size and segments to avoid high order allocations
  nvme-pci: move nvme_kill_queues to nvme_remove_dead_ctrl
  nvme-fc: release io queues to allow fast fail
  nbd: Add the nbd NBD_DISCONNECT_ON_CLOSE config flag.
  block: sed-opal: Fix a couple off by one bugs
  blk-mq-debugfs: Off by one in blk_mq_rq_state_name()
  nvmet: reset keep alive timer in controller enable
  nvme-rdma: don't override opts->queue_size
  nvme-rdma: Fix command completion race at error recovery
  nvme-rdma: fix possible free of a non-allocated async event buffer
  nvme-rdma: fix possible double free condition when failing to create a controller
  Revert "block: Add warning for bi_next not NULL in bio_endio()"
  block: fix timeout changes for legacy request drivers

20 files changed:
block/bio.c
block/blk-core.c
block/blk-mq-debugfs.c
block/blk-mq.c
block/blk-softirq.c
block/blk-timeout.c
block/sed-opal.c
drivers/block/nbd.c
drivers/block/null_blk.c
drivers/lightnvm/Kconfig
drivers/nvme/host/core.c
drivers/nvme/host/fc.c
drivers/nvme/host/nvme.h
drivers/nvme/host/pci.c
drivers/nvme/host/rdma.c
drivers/nvme/target/core.c
drivers/scsi/scsi_transport_fc.c
include/linux/backing-dev-defs.h
include/uapi/linux/nbd.h
mm/backing-dev.c

index 9710e275f23079b8b7548ee935ab653036e82c5d..67eff5eddc49190fe8be721edd7031e8b6284a1e 100644 (file)
@@ -1807,9 +1807,6 @@ again:
        if (!bio_integrity_endio(bio))
                return;
 
-       if (WARN_ONCE(bio->bi_next, "driver left bi_next not NULL"))
-               bio->bi_next = NULL;
-
        /*
         * Need to have a real endio function for chained bios, otherwise
         * various corner cases will break (like stacking block devices that
index cf0ee764b908b384f69be9efbb9d7a1352eb7a52..afd2596ea3d3601960ef665b48a8a75e100f3741 100644 (file)
@@ -273,10 +273,6 @@ static void req_bio_endio(struct request *rq, struct bio *bio,
        bio_advance(bio, nbytes);
 
        /* don't actually finish bio if it's part of flush sequence */
-       /*
-        * XXX this code looks suspicious - it's not consistent with advancing
-        * req->bio in caller
-        */
        if (bio->bi_iter.bi_size == 0 && !(rq->rq_flags & RQF_FLUSH_SEQ))
                bio_endio(bio);
 }
@@ -3081,10 +3077,8 @@ bool blk_update_request(struct request *req, blk_status_t error,
                struct bio *bio = req->bio;
                unsigned bio_bytes = min(bio->bi_iter.bi_size, nr_bytes);
 
-               if (bio_bytes == bio->bi_iter.bi_size) {
+               if (bio_bytes == bio->bi_iter.bi_size)
                        req->bio = bio->bi_next;
-                       bio->bi_next = NULL;
-               }
 
                /* Completion has already been traced */
                bio_clear_flag(bio, BIO_TRACE_COMPLETION);
index ffa622366922fed04e9dbd2606ffa294b25a697b..1c4532e9293800662d92b809d78637e06607fd90 100644 (file)
@@ -356,7 +356,7 @@ static const char *const blk_mq_rq_state_name_array[] = {
 
 static const char *blk_mq_rq_state_name(enum mq_rq_state rq_state)
 {
-       if (WARN_ON_ONCE((unsigned int)rq_state >
+       if (WARN_ON_ONCE((unsigned int)rq_state >=
                         ARRAY_SIZE(blk_mq_rq_state_name_array)))
                return "(?)";
        return blk_mq_rq_state_name_array[rq_state];
index 70c65bb6c0131c84130fae44808acb51cf427ace..b429d515b5689eddb29c19886fc221d0e0c463f9 100644 (file)
@@ -781,7 +781,6 @@ static void blk_mq_rq_timed_out(struct request *req, bool reserved)
                WARN_ON_ONCE(ret != BLK_EH_RESET_TIMER);
        }
 
-       req->rq_flags &= ~RQF_TIMED_OUT;
        blk_add_timer(req);
 }
 
index 01e2b353a2b9aadc2b5d20fe227739d1d0ae296b..15c1f5e12eb89460bc42eb7f5807eaa03254e51d 100644 (file)
@@ -144,6 +144,7 @@ do_local:
 
        local_irq_restore(flags);
 }
+EXPORT_SYMBOL(__blk_complete_request);
 
 /**
  * blk_complete_request - end I/O on a request
index 4b8a48d48ba13394cf0ae7a7dc0016696ae5efd6..f2cfd56e1606ed9d8e1da979a1e1e6cdcb506a38 100644 (file)
@@ -210,6 +210,7 @@ void blk_add_timer(struct request *req)
        if (!req->timeout)
                req->timeout = q->rq_timeout;
 
+       req->rq_flags &= ~RQF_TIMED_OUT;
        blk_rq_set_deadline(req, jiffies + req->timeout);
 
        /*
index 945f4b8610e0c7d85242b500141d4bc1c0f671a2..e0de4dd448b3c7238e8656b572de72206302bf87 100644 (file)
@@ -877,7 +877,7 @@ static size_t response_get_string(const struct parsed_resp *resp, int n,
                return 0;
        }
 
-       if (n > resp->num) {
+       if (n >= resp->num) {
                pr_debug("Response has %d tokens. Can't access %d\n",
                         resp->num, n);
                return 0;
@@ -916,7 +916,7 @@ static u64 response_get_u64(const struct parsed_resp *resp, int n)
                return 0;
        }
 
-       if (n > resp->num) {
+       if (n >= resp->num) {
                pr_debug("Response has %d tokens. Can't access %d\n",
                         resp->num, n);
                return 0;
index 3b7083b8ecbb3b0ffcad0d2879954780075333ee..74a05561b620a3be51bd30a09f1eeaa1b9168876 100644 (file)
@@ -76,6 +76,7 @@ struct link_dead_args {
 #define NBD_HAS_CONFIG_REF             4
 #define NBD_BOUND                      5
 #define NBD_DESTROY_ON_DISCONNECT      6
+#define NBD_DISCONNECT_ON_CLOSE        7
 
 struct nbd_config {
        u32 flags;
@@ -138,6 +139,7 @@ static void nbd_config_put(struct nbd_device *nbd);
 static void nbd_connect_reply(struct genl_info *info, int index);
 static int nbd_genl_status(struct sk_buff *skb, struct genl_info *info);
 static void nbd_dead_link_work(struct work_struct *work);
+static void nbd_disconnect_and_put(struct nbd_device *nbd);
 
 static inline struct device *nbd_to_dev(struct nbd_device *nbd)
 {
@@ -1305,6 +1307,12 @@ out:
 static void nbd_release(struct gendisk *disk, fmode_t mode)
 {
        struct nbd_device *nbd = disk->private_data;
+       struct block_device *bdev = bdget_disk(disk, 0);
+
+       if (test_bit(NBD_DISCONNECT_ON_CLOSE, &nbd->config->runtime_flags) &&
+                       bdev->bd_openers == 0)
+               nbd_disconnect_and_put(nbd);
+
        nbd_config_put(nbd);
        nbd_put(nbd);
 }
@@ -1705,6 +1713,10 @@ again:
                                &config->runtime_flags);
                        put_dev = true;
                }
+               if (flags & NBD_CFLAG_DISCONNECT_ON_CLOSE) {
+                       set_bit(NBD_DISCONNECT_ON_CLOSE,
+                               &config->runtime_flags);
+               }
        }
 
        if (info->attrs[NBD_ATTR_SOCKETS]) {
@@ -1749,6 +1761,17 @@ out:
        return ret;
 }
 
+static void nbd_disconnect_and_put(struct nbd_device *nbd)
+{
+       mutex_lock(&nbd->config_lock);
+       nbd_disconnect(nbd);
+       nbd_clear_sock(nbd);
+       mutex_unlock(&nbd->config_lock);
+       if (test_and_clear_bit(NBD_HAS_CONFIG_REF,
+                              &nbd->config->runtime_flags))
+               nbd_config_put(nbd);
+}
+
 static int nbd_genl_disconnect(struct sk_buff *skb, struct genl_info *info)
 {
        struct nbd_device *nbd;
@@ -1781,13 +1804,7 @@ static int nbd_genl_disconnect(struct sk_buff *skb, struct genl_info *info)
                nbd_put(nbd);
                return 0;
        }
-       mutex_lock(&nbd->config_lock);
-       nbd_disconnect(nbd);
-       nbd_clear_sock(nbd);
-       mutex_unlock(&nbd->config_lock);
-       if (test_and_clear_bit(NBD_HAS_CONFIG_REF,
-                              &nbd->config->runtime_flags))
-               nbd_config_put(nbd);
+       nbd_disconnect_and_put(nbd);
        nbd_config_put(nbd);
        nbd_put(nbd);
        return 0;
@@ -1798,7 +1815,7 @@ static int nbd_genl_reconfigure(struct sk_buff *skb, struct genl_info *info)
        struct nbd_device *nbd = NULL;
        struct nbd_config *config;
        int index;
-       int ret = -EINVAL;
+       int ret = 0;
        bool put_dev = false;
 
        if (!netlink_capable(skb, CAP_SYS_ADMIN))
@@ -1838,6 +1855,7 @@ static int nbd_genl_reconfigure(struct sk_buff *skb, struct genl_info *info)
            !nbd->task_recv) {
                dev_err(nbd_to_dev(nbd),
                        "not configured, cannot reconfigure\n");
+               ret = -EINVAL;
                goto out;
        }
 
@@ -1862,6 +1880,14 @@ static int nbd_genl_reconfigure(struct sk_buff *skb, struct genl_info *info)
                                               &config->runtime_flags))
                                refcount_inc(&nbd->refs);
                }
+
+               if (flags & NBD_CFLAG_DISCONNECT_ON_CLOSE) {
+                       set_bit(NBD_DISCONNECT_ON_CLOSE,
+                                       &config->runtime_flags);
+               } else {
+                       clear_bit(NBD_DISCONNECT_ON_CLOSE,
+                                       &config->runtime_flags);
+               }
        }
 
        if (info->attrs[NBD_ATTR_SOCKETS]) {
index 7948049f6c4321b02e1611383dae1be86a7748f1..042c778e5a4e0bf2009c38a6b1cf37bc5d23ce89 100644 (file)
@@ -1365,7 +1365,7 @@ static blk_qc_t null_queue_bio(struct request_queue *q, struct bio *bio)
 static enum blk_eh_timer_return null_rq_timed_out_fn(struct request *rq)
 {
        pr_info("null: rq %p timed out\n", rq);
-       blk_mq_complete_request(rq);
+       __blk_complete_request(rq);
        return BLK_EH_DONE;
 }
 
index 10c08982185a572ff05683461d514e86c8920f96..9c03f35d9df113c6eb6608f4b48b85447635aca9 100644 (file)
@@ -4,7 +4,7 @@
 
 menuconfig NVM
        bool "Open-Channel SSD target support"
-       depends on BLOCK && HAS_DMA && PCI
+       depends on BLOCK && PCI
        select BLK_DEV_NVME
        help
          Say Y here to get to enable Open-channel SSDs.
index 21710a7460c823bbc4f84134d7ecce70d3f993ba..46df030b2c3f74f33621dc7d4512b17fe40fd079 100644 (file)
@@ -1808,6 +1808,7 @@ static void nvme_set_queue_limits(struct nvme_ctrl *ctrl,
                u32 max_segments =
                        (ctrl->max_hw_sectors / (ctrl->page_size >> 9)) + 1;
 
+               max_segments = min_not_zero(max_segments, ctrl->max_segments);
                blk_queue_max_hw_sectors(q, ctrl->max_hw_sectors);
                blk_queue_max_segments(q, min_t(u32, max_segments, USHRT_MAX));
        }
index b528a2f5826cbfe19b22aadd7e09e1ceff512cb6..41d45a1b5c628c143caac1886bd52f08a360f15e 100644 (file)
@@ -2790,6 +2790,9 @@ nvme_fc_delete_association(struct nvme_fc_ctrl *ctrl)
        /* re-enable the admin_q so anything new can fast fail */
        blk_mq_unquiesce_queue(ctrl->ctrl.admin_q);
 
+       /* resume the io queues so that things will fast fail */
+       nvme_start_queues(&ctrl->ctrl);
+
        nvme_fc_ctlr_inactive_on_rport(ctrl);
 }
 
@@ -2804,9 +2807,6 @@ nvme_fc_delete_ctrl(struct nvme_ctrl *nctrl)
         * waiting for io to terminate
         */
        nvme_fc_delete_association(ctrl);
-
-       /* resume the io queues so that things will fast fail */
-       nvme_start_queues(nctrl);
 }
 
 static void
index 231807cbc849869afcbc16fce2e3389539ce2684..0c4a33df3b2f3bb9d8a710556dab3f8c55ed2302 100644 (file)
@@ -170,6 +170,7 @@ struct nvme_ctrl {
        u64 cap;
        u32 page_size;
        u32 max_hw_sectors;
+       u32 max_segments;
        u16 oncs;
        u16 oacs;
        u16 nssa;
index fc33804662e7bd35cfbacd93a26101bf23b3f43d..ba943f211687c638cab9708dbb5011b7e3d53b2f 100644 (file)
 
 #define SGES_PER_PAGE  (PAGE_SIZE / sizeof(struct nvme_sgl_desc))
 
+/*
+ * These can be higher, but we need to ensure that any command doesn't
+ * require an sg allocation that needs more than a page of data.
+ */
+#define NVME_MAX_KB_SZ 4096
+#define NVME_MAX_SEGS  127
+
 static int use_threaded_interrupts;
 module_param(use_threaded_interrupts, int, 0);
 
@@ -100,6 +107,8 @@ struct nvme_dev {
        struct nvme_ctrl ctrl;
        struct completion ioq_wait;
 
+       mempool_t *iod_mempool;
+
        /* shadow doorbell buffer support: */
        u32 *dbbuf_dbs;
        dma_addr_t dbbuf_dbs_dma_addr;
@@ -477,10 +486,7 @@ static blk_status_t nvme_init_iod(struct request *rq, struct nvme_dev *dev)
        iod->use_sgl = nvme_pci_use_sgls(dev, rq);
 
        if (nseg > NVME_INT_PAGES || size > NVME_INT_BYTES(dev)) {
-               size_t alloc_size = nvme_pci_iod_alloc_size(dev, size, nseg,
-                               iod->use_sgl);
-
-               iod->sg = kmalloc(alloc_size, GFP_ATOMIC);
+               iod->sg = mempool_alloc(dev->iod_mempool, GFP_ATOMIC);
                if (!iod->sg)
                        return BLK_STS_RESOURCE;
        } else {
@@ -526,7 +532,7 @@ static void nvme_free_iod(struct nvme_dev *dev, struct request *req)
        }
 
        if (iod->sg != iod->inline_sg)
-               kfree(iod->sg);
+               mempool_free(iod->sg, dev->iod_mempool);
 }
 
 #ifdef CONFIG_BLK_DEV_INTEGRITY
@@ -2280,6 +2286,7 @@ static void nvme_pci_free_ctrl(struct nvme_ctrl *ctrl)
                blk_put_queue(dev->ctrl.admin_q);
        kfree(dev->queues);
        free_opal_dev(dev->ctrl.opal_dev);
+       mempool_destroy(dev->iod_mempool);
        kfree(dev);
 }
 
@@ -2289,6 +2296,7 @@ static void nvme_remove_dead_ctrl(struct nvme_dev *dev, int status)
 
        nvme_get_ctrl(&dev->ctrl);
        nvme_dev_disable(dev, false);
+       nvme_kill_queues(&dev->ctrl);
        if (!queue_work(nvme_wq, &dev->remove_work))
                nvme_put_ctrl(&dev->ctrl);
 }
@@ -2333,6 +2341,13 @@ static void nvme_reset_work(struct work_struct *work)
        if (result)
                goto out;
 
+       /*
+        * Limit the max command size to prevent iod->sg allocations going
+        * over a single page.
+        */
+       dev->ctrl.max_hw_sectors = NVME_MAX_KB_SZ << 1;
+       dev->ctrl.max_segments = NVME_MAX_SEGS;
+
        result = nvme_init_identify(&dev->ctrl);
        if (result)
                goto out;
@@ -2405,7 +2420,6 @@ static void nvme_remove_dead_ctrl_work(struct work_struct *work)
        struct nvme_dev *dev = container_of(work, struct nvme_dev, remove_work);
        struct pci_dev *pdev = to_pci_dev(dev->dev);
 
-       nvme_kill_queues(&dev->ctrl);
        if (pci_get_drvdata(pdev))
                device_release_driver(&pdev->dev);
        nvme_put_ctrl(&dev->ctrl);
@@ -2509,6 +2523,7 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
        int node, result = -ENOMEM;
        struct nvme_dev *dev;
        unsigned long quirks = id->driver_data;
+       size_t alloc_size;
 
        node = dev_to_node(&pdev->dev);
        if (node == NUMA_NO_NODE)
@@ -2546,6 +2561,23 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
        if (result)
                goto release_pools;
 
+       /*
+        * Double check that our mempool alloc size will cover the biggest
+        * command we support.
+        */
+       alloc_size = nvme_pci_iod_alloc_size(dev, NVME_MAX_KB_SZ,
+                                               NVME_MAX_SEGS, true);
+       WARN_ON_ONCE(alloc_size > PAGE_SIZE);
+
+       dev->iod_mempool = mempool_create_node(1, mempool_kmalloc,
+                                               mempool_kfree,
+                                               (void *) alloc_size,
+                                               GFP_KERNEL, node);
+       if (!dev->iod_mempool) {
+               result = -ENOMEM;
+               goto release_pools;
+       }
+
        dev_info(dev->ctrl.device, "pci function %s\n", dev_name(&pdev->dev));
 
        nvme_get_ctrl(&dev->ctrl);
index c9424da0d23e3cbbdd0e2b5209d9eddca9f1591f..9544625c0b7d687a6b1a9fa26fdfb80ed2d50068 100644 (file)
@@ -560,12 +560,6 @@ static void nvme_rdma_free_queue(struct nvme_rdma_queue *queue)
        if (!test_and_clear_bit(NVME_RDMA_Q_ALLOCATED, &queue->flags))
                return;
 
-       if (nvme_rdma_queue_idx(queue) == 0) {
-               nvme_rdma_free_qe(queue->device->dev,
-                       &queue->ctrl->async_event_sqe,
-                       sizeof(struct nvme_command), DMA_TO_DEVICE);
-       }
-
        nvme_rdma_destroy_queue_ib(queue);
        rdma_destroy_id(queue->cm_id);
 }
@@ -698,7 +692,7 @@ static struct blk_mq_tag_set *nvme_rdma_alloc_tagset(struct nvme_ctrl *nctrl,
                set = &ctrl->tag_set;
                memset(set, 0, sizeof(*set));
                set->ops = &nvme_rdma_mq_ops;
-               set->queue_depth = nctrl->opts->queue_size;
+               set->queue_depth = nctrl->sqsize + 1;
                set->reserved_tags = 1; /* fabric connect */
                set->numa_node = NUMA_NO_NODE;
                set->flags = BLK_MQ_F_SHOULD_MERGE;
@@ -734,11 +728,12 @@ out:
 static void nvme_rdma_destroy_admin_queue(struct nvme_rdma_ctrl *ctrl,
                bool remove)
 {
-       nvme_rdma_stop_queue(&ctrl->queues[0]);
        if (remove) {
                blk_cleanup_queue(ctrl->ctrl.admin_q);
                nvme_rdma_free_tagset(&ctrl->ctrl, ctrl->ctrl.admin_tagset);
        }
+       nvme_rdma_free_qe(ctrl->device->dev, &ctrl->async_event_sqe,
+               sizeof(struct nvme_command), DMA_TO_DEVICE);
        nvme_rdma_free_queue(&ctrl->queues[0]);
 }
 
@@ -755,11 +750,16 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl,
 
        ctrl->max_fr_pages = nvme_rdma_get_max_fr_pages(ctrl->device->dev);
 
+       error = nvme_rdma_alloc_qe(ctrl->device->dev, &ctrl->async_event_sqe,
+                       sizeof(struct nvme_command), DMA_TO_DEVICE);
+       if (error)
+               goto out_free_queue;
+
        if (new) {
                ctrl->ctrl.admin_tagset = nvme_rdma_alloc_tagset(&ctrl->ctrl, true);
                if (IS_ERR(ctrl->ctrl.admin_tagset)) {
                        error = PTR_ERR(ctrl->ctrl.admin_tagset);
-                       goto out_free_queue;
+                       goto out_free_async_qe;
                }
 
                ctrl->ctrl.admin_q = blk_mq_init_queue(&ctrl->admin_tag_set);
@@ -795,12 +795,6 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl,
        if (error)
                goto out_stop_queue;
 
-       error = nvme_rdma_alloc_qe(ctrl->queues[0].device->dev,
-                       &ctrl->async_event_sqe, sizeof(struct nvme_command),
-                       DMA_TO_DEVICE);
-       if (error)
-               goto out_stop_queue;
-
        return 0;
 
 out_stop_queue:
@@ -811,6 +805,9 @@ out_cleanup_queue:
 out_free_tagset:
        if (new)
                nvme_rdma_free_tagset(&ctrl->ctrl, ctrl->ctrl.admin_tagset);
+out_free_async_qe:
+       nvme_rdma_free_qe(ctrl->device->dev, &ctrl->async_event_sqe,
+               sizeof(struct nvme_command), DMA_TO_DEVICE);
 out_free_queue:
        nvme_rdma_free_queue(&ctrl->queues[0]);
        return error;
@@ -819,7 +816,6 @@ out_free_queue:
 static void nvme_rdma_destroy_io_queues(struct nvme_rdma_ctrl *ctrl,
                bool remove)
 {
-       nvme_rdma_stop_io_queues(ctrl);
        if (remove) {
                blk_cleanup_queue(ctrl->ctrl.connect_q);
                nvme_rdma_free_tagset(&ctrl->ctrl, ctrl->ctrl.tagset);
@@ -888,9 +884,9 @@ static void nvme_rdma_free_ctrl(struct nvme_ctrl *nctrl)
        list_del(&ctrl->list);
        mutex_unlock(&nvme_rdma_ctrl_mutex);
 
-       kfree(ctrl->queues);
        nvmf_free_options(nctrl->opts);
 free_ctrl:
+       kfree(ctrl->queues);
        kfree(ctrl);
 }
 
@@ -949,6 +945,7 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work)
        return;
 
 destroy_admin:
+       nvme_rdma_stop_queue(&ctrl->queues[0]);
        nvme_rdma_destroy_admin_queue(ctrl, false);
 requeue:
        dev_info(ctrl->ctrl.device, "Failed reconnect attempt %d\n",
@@ -965,12 +962,14 @@ static void nvme_rdma_error_recovery_work(struct work_struct *work)
 
        if (ctrl->ctrl.queue_count > 1) {
                nvme_stop_queues(&ctrl->ctrl);
+               nvme_rdma_stop_io_queues(ctrl);
                blk_mq_tagset_busy_iter(&ctrl->tag_set,
                                        nvme_cancel_request, &ctrl->ctrl);
                nvme_rdma_destroy_io_queues(ctrl, false);
        }
 
        blk_mq_quiesce_queue(ctrl->ctrl.admin_q);
+       nvme_rdma_stop_queue(&ctrl->queues[0]);
        blk_mq_tagset_busy_iter(&ctrl->admin_tag_set,
                                nvme_cancel_request, &ctrl->ctrl);
        nvme_rdma_destroy_admin_queue(ctrl, false);
@@ -1736,6 +1735,7 @@ static void nvme_rdma_shutdown_ctrl(struct nvme_rdma_ctrl *ctrl, bool shutdown)
 {
        if (ctrl->ctrl.queue_count > 1) {
                nvme_stop_queues(&ctrl->ctrl);
+               nvme_rdma_stop_io_queues(ctrl);
                blk_mq_tagset_busy_iter(&ctrl->tag_set,
                                        nvme_cancel_request, &ctrl->ctrl);
                nvme_rdma_destroy_io_queues(ctrl, shutdown);
@@ -1747,6 +1747,7 @@ static void nvme_rdma_shutdown_ctrl(struct nvme_rdma_ctrl *ctrl, bool shutdown)
                nvme_disable_ctrl(&ctrl->ctrl, ctrl->ctrl.cap);
 
        blk_mq_quiesce_queue(ctrl->ctrl.admin_q);
+       nvme_rdma_stop_queue(&ctrl->queues[0]);
        blk_mq_tagset_busy_iter(&ctrl->admin_tag_set,
                                nvme_cancel_request, &ctrl->ctrl);
        blk_mq_unquiesce_queue(ctrl->ctrl.admin_q);
@@ -1932,11 +1933,6 @@ static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev,
                goto out_free_ctrl;
        }
 
-       ret = nvme_init_ctrl(&ctrl->ctrl, dev, &nvme_rdma_ctrl_ops,
-                               0 /* no quirks, we're perfect! */);
-       if (ret)
-               goto out_free_ctrl;
-
        INIT_DELAYED_WORK(&ctrl->reconnect_work,
                        nvme_rdma_reconnect_ctrl_work);
        INIT_WORK(&ctrl->err_work, nvme_rdma_error_recovery_work);
@@ -1950,14 +1946,19 @@ static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev,
        ctrl->queues = kcalloc(ctrl->ctrl.queue_count, sizeof(*ctrl->queues),
                                GFP_KERNEL);
        if (!ctrl->queues)
-               goto out_uninit_ctrl;
+               goto out_free_ctrl;
+
+       ret = nvme_init_ctrl(&ctrl->ctrl, dev, &nvme_rdma_ctrl_ops,
+                               0 /* no quirks, we're perfect! */);
+       if (ret)
+               goto out_kfree_queues;
 
        changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING);
        WARN_ON_ONCE(!changed);
 
        ret = nvme_rdma_configure_admin_queue(ctrl, true);
        if (ret)
-               goto out_kfree_queues;
+               goto out_uninit_ctrl;
 
        /* sanity check icdoff */
        if (ctrl->ctrl.icdoff) {
@@ -1974,20 +1975,19 @@ static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev,
                goto out_remove_admin_queue;
        }
 
-       if (opts->queue_size > ctrl->ctrl.maxcmd) {
-               /* warn if maxcmd is lower than queue_size */
-               dev_warn(ctrl->ctrl.device,
-                       "queue_size %zu > ctrl maxcmd %u, clamping down\n",
-                       opts->queue_size, ctrl->ctrl.maxcmd);
-               opts->queue_size = ctrl->ctrl.maxcmd;
-       }
-
+       /* only warn if argument is too large here, will clamp later */
        if (opts->queue_size > ctrl->ctrl.sqsize + 1) {
-               /* warn if sqsize is lower than queue_size */
                dev_warn(ctrl->ctrl.device,
                        "queue_size %zu > ctrl sqsize %u, clamping down\n",
                        opts->queue_size, ctrl->ctrl.sqsize + 1);
-               opts->queue_size = ctrl->ctrl.sqsize + 1;
+       }
+
+       /* warn if maxcmd is lower than sqsize+1 */
+       if (ctrl->ctrl.sqsize + 1 > ctrl->ctrl.maxcmd) {
+               dev_warn(ctrl->ctrl.device,
+                       "sqsize %u > ctrl maxcmd %u, clamping down\n",
+                       ctrl->ctrl.sqsize + 1, ctrl->ctrl.maxcmd);
+               ctrl->ctrl.sqsize = ctrl->ctrl.maxcmd - 1;
        }
 
        if (opts->nr_io_queues) {
@@ -2013,15 +2013,16 @@ static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev,
        return &ctrl->ctrl;
 
 out_remove_admin_queue:
+       nvme_rdma_stop_queue(&ctrl->queues[0]);
        nvme_rdma_destroy_admin_queue(ctrl, true);
-out_kfree_queues:
-       kfree(ctrl->queues);
 out_uninit_ctrl:
        nvme_uninit_ctrl(&ctrl->ctrl);
        nvme_put_ctrl(&ctrl->ctrl);
        if (ret > 0)
                ret = -EIO;
        return ERR_PTR(ret);
+out_kfree_queues:
+       kfree(ctrl->queues);
 out_free_ctrl:
        kfree(ctrl);
        return ERR_PTR(ret);
index a03da764ecae8cb3ec9bf0bb9c68971669b895fc..74d4b785d2daac7d203108f06286221b5337f993 100644 (file)
@@ -686,6 +686,14 @@ static void nvmet_start_ctrl(struct nvmet_ctrl *ctrl)
        }
 
        ctrl->csts = NVME_CSTS_RDY;
+
+       /*
+        * Controllers that are not yet enabled should not really enforce the
+        * keep alive timeout, but we still want to track a timeout and cleanup
+        * in case a host died before it enabled the controller.  Hence, simply
+        * reset the keep alive timer when the controller is enabled.
+        */
+       mod_delayed_work(system_wq, &ctrl->ka_work, ctrl->kato * HZ);
 }
 
 static void nvmet_clear_ctrl(struct nvmet_ctrl *ctrl)
index 1da3d71e9f61f784e8131093bd5378d94bb98745..13948102ca298cf1a20d45d49781aa4dee55d851 100644 (file)
@@ -3592,7 +3592,7 @@ fc_bsg_job_timeout(struct request *req)
 
        /* the blk_end_sync_io() doesn't check the error */
        if (inflight)
-               blk_mq_complete_request(req);
+               __blk_complete_request(req);
        return BLK_EH_DONE;
 }
 
index 0bd432a4d7bd00ce376292720edd104d617c80c2..24251762c20c94edd238cfca1c1f55f0269d4e80 100644 (file)
@@ -22,7 +22,6 @@ struct dentry;
  */
 enum wb_state {
        WB_registered,          /* bdi_register() was done */
-       WB_shutting_down,       /* wb_shutdown() in progress */
        WB_writeback_running,   /* Writeback is in progress */
        WB_has_dirty_io,        /* Dirty inodes on ->b_{dirty|io|more_io} */
        WB_start_all,           /* nr_pages == 0 (all) work pending */
@@ -189,6 +188,7 @@ struct backing_dev_info {
 #ifdef CONFIG_CGROUP_WRITEBACK
        struct radix_tree_root cgwb_tree; /* radix tree of active cgroup wbs */
        struct rb_root cgwb_congested_tree; /* their congested states */
+       struct mutex cgwb_release_mutex;  /* protect shutdown of wb structs */
 #else
        struct bdi_writeback_congested *wb_congested;
 #endif
index 85a3fb65e40a6f3941337c7fad17e7fdff3b33d0..20d6cc91435df90f08741c478ab29ea85efa7167 100644 (file)
@@ -53,6 +53,9 @@ enum {
 /* These are client behavior specific flags. */
 #define NBD_CFLAG_DESTROY_ON_DISCONNECT        (1 << 0) /* delete the nbd device on
                                                    disconnect. */
+#define NBD_CFLAG_DISCONNECT_ON_CLOSE (1 << 1) /* disconnect the nbd device on
+                                               *  close by last opener.
+                                               */
 
 /* userspace doesn't need the nbd_device structure */
 
index 347cc834c04a8cbc388af1b7594e4f09bdc68b41..2e5d3df0853d928021cba0e30c70ff682afeaf68 100644 (file)
@@ -359,15 +359,8 @@ static void wb_shutdown(struct bdi_writeback *wb)
        spin_lock_bh(&wb->work_lock);
        if (!test_and_clear_bit(WB_registered, &wb->state)) {
                spin_unlock_bh(&wb->work_lock);
-               /*
-                * Wait for wb shutdown to finish if someone else is just
-                * running wb_shutdown(). Otherwise we could proceed to wb /
-                * bdi destruction before wb_shutdown() is finished.
-                */
-               wait_on_bit(&wb->state, WB_shutting_down, TASK_UNINTERRUPTIBLE);
                return;
        }
-       set_bit(WB_shutting_down, &wb->state);
        spin_unlock_bh(&wb->work_lock);
 
        cgwb_remove_from_bdi_list(wb);
@@ -379,12 +372,6 @@ static void wb_shutdown(struct bdi_writeback *wb)
        mod_delayed_work(bdi_wq, &wb->dwork, 0);
        flush_delayed_work(&wb->dwork);
        WARN_ON(!list_empty(&wb->work_list));
-       /*
-        * Make sure bit gets cleared after shutdown is finished. Matches with
-        * the barrier provided by test_and_clear_bit() above.
-        */
-       smp_wmb();
-       clear_and_wake_up_bit(WB_shutting_down, &wb->state);
 }
 
 static void wb_exit(struct bdi_writeback *wb)
@@ -508,10 +495,12 @@ static void cgwb_release_workfn(struct work_struct *work)
        struct bdi_writeback *wb = container_of(work, struct bdi_writeback,
                                                release_work);
 
+       mutex_lock(&wb->bdi->cgwb_release_mutex);
        wb_shutdown(wb);
 
        css_put(wb->memcg_css);
        css_put(wb->blkcg_css);
+       mutex_unlock(&wb->bdi->cgwb_release_mutex);
 
        fprop_local_destroy_percpu(&wb->memcg_completions);
        percpu_ref_exit(&wb->refcnt);
@@ -697,6 +686,7 @@ static int cgwb_bdi_init(struct backing_dev_info *bdi)
 
        INIT_RADIX_TREE(&bdi->cgwb_tree, GFP_ATOMIC);
        bdi->cgwb_congested_tree = RB_ROOT;
+       mutex_init(&bdi->cgwb_release_mutex);
 
        ret = wb_init(&bdi->wb, bdi, 1, GFP_KERNEL);
        if (!ret) {
@@ -717,7 +707,10 @@ static void cgwb_bdi_unregister(struct backing_dev_info *bdi)
        spin_lock_irq(&cgwb_lock);
        radix_tree_for_each_slot(slot, &bdi->cgwb_tree, &iter, 0)
                cgwb_kill(*slot);
+       spin_unlock_irq(&cgwb_lock);
 
+       mutex_lock(&bdi->cgwb_release_mutex);
+       spin_lock_irq(&cgwb_lock);
        while (!list_empty(&bdi->wb_list)) {
                wb = list_first_entry(&bdi->wb_list, struct bdi_writeback,
                                      bdi_node);
@@ -726,6 +719,7 @@ static void cgwb_bdi_unregister(struct backing_dev_info *bdi)
                spin_lock_irq(&cgwb_lock);
        }
        spin_unlock_irq(&cgwb_lock);
+       mutex_unlock(&bdi->cgwb_release_mutex);
 }
 
 /**