Merge branch 'for-linus' of git://git.kernel.dk/linux-block
authorLinus Torvalds <torvalds@linux-foundation.org>
Wed, 13 Sep 2017 17:20:41 +0000 (10:20 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Wed, 13 Sep 2017 17:20:41 +0000 (10:20 -0700)
Pull block fixes from Jens Axboe:
 "Small collection of fixes that would be nice to have in -rc1. This
  contains:

   - NVMe pull request form Christoph, mostly with fixes for nvme-pci,
     host memory buffer in particular.

   - Error handling fixup for cgwb_create(), in case allocation of 'wb'
     fails. From Christophe Jaillet.

   - Ensure that trace_block_getrq() gets the 'dev' in an appropriate
     fashion, to avoid a potential NULL deref. From Greg Thelen.

   - Regression fix for dm-mq with blk-mq, fixing a problem with
     stacking IO schedulers. From me.

   - string.h fixup, fixing an issue with memcpy_and_pad(). This
     original change came in through an NVMe dependency, which is why
     I'm including it here. From Martin Wilck.

   - Fix potential int overflow in __blkdev_sectors_to_bio_pages(), from
     Mikulas.

   - MBR enable fix for sed-opal, from Scott"

* 'for-linus' of git://git.kernel.dk/linux-block:
  block: directly insert blk-mq request from blk_insert_cloned_request()
  mm/backing-dev.c: fix an error handling path in 'cgwb_create()'
  string.h: un-fortify memcpy_and_pad
  nvme-pci: implement the HMB entry number and size limitations
  nvme-pci: propagate (some) errors from host memory buffer setup
  nvme-pci: use appropriate initial chunk size for HMB allocation
  nvme-pci: fix host memory buffer allocation fallback
  nvme: fix lightnvm check
  block: fix integer overflow in __blkdev_sectors_to_bio_pages()
  block: sed-opal: Set MBRDone on S3 resume path if TPER is MBREnabled
  block: tolerate tracing of NULL bio

14 files changed:
block/blk-core.c
block/blk-lib.c
block/blk-mq.c
block/blk-mq.h
block/opal_proto.h
block/sed-opal.c
drivers/nvme/host/core.c
drivers/nvme/host/lightnvm.c
drivers/nvme/host/nvme.h
drivers/nvme/host/pci.c
include/linux/nvme.h
include/linux/string.h
include/trace/events/block.h
mm/backing-dev.c

index d709c0e3a2ac012c989549bd32ff47306a2ff2a5..aebe676225e6fdf360f39760cacf7326cf78cd9f 100644 (file)
@@ -2342,7 +2342,12 @@ blk_status_t blk_insert_cloned_request(struct request_queue *q, struct request *
        if (q->mq_ops) {
                if (blk_queue_io_stat(q))
                        blk_account_io_start(rq, true);
-               blk_mq_sched_insert_request(rq, false, true, false, false);
+               /*
+                * Since we have a scheduler attached on the top device,
+                * bypass a potential scheduler on the bottom device for
+                * insert.
+                */
+               blk_mq_request_bypass_insert(rq);
                return BLK_STS_OK;
        }
 
index e01adb5145b3a0e82e17aa3f72c58074c6a05893..62240f8832ca6e62f8f3e5dddedb9428a978250d 100644 (file)
@@ -269,9 +269,9 @@ static int __blkdev_issue_write_zeroes(struct block_device *bdev,
  */
 static unsigned int __blkdev_sectors_to_bio_pages(sector_t nr_sects)
 {
-       sector_t bytes = (nr_sects << 9) + PAGE_SIZE - 1;
+       sector_t pages = DIV_ROUND_UP_SECTOR_T(nr_sects, PAGE_SIZE / 512);
 
-       return min(bytes >> PAGE_SHIFT, (sector_t)BIO_MAX_PAGES);
+       return min(pages, (sector_t)BIO_MAX_PAGES);
 }
 
 /**
index 3f18cff80050331ece90da34aec20819ae2e3245..98a18609755e94494b4239012f3c57c1503635bf 100644 (file)
@@ -1401,6 +1401,22 @@ void __blk_mq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
        blk_mq_hctx_mark_pending(hctx, ctx);
 }
 
+/*
+ * Should only be used carefully, when the caller knows we want to
+ * bypass a potential IO scheduler on the target device.
+ */
+void blk_mq_request_bypass_insert(struct request *rq)
+{
+       struct blk_mq_ctx *ctx = rq->mq_ctx;
+       struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(rq->q, ctx->cpu);
+
+       spin_lock(&hctx->lock);
+       list_add_tail(&rq->queuelist, &hctx->dispatch);
+       spin_unlock(&hctx->lock);
+
+       blk_mq_run_hw_queue(hctx, false);
+}
+
 void blk_mq_insert_requests(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx,
                            struct list_head *list)
 
index 98252b79b80b65aef059f8376bce689928e21b6b..ef15b3414da548f4b5c8d2a438d8b52a54cf8e36 100644 (file)
@@ -54,6 +54,7 @@ int blk_mq_alloc_rqs(struct blk_mq_tag_set *set, struct blk_mq_tags *tags,
  */
 void __blk_mq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
                                bool at_head);
+void blk_mq_request_bypass_insert(struct request *rq);
 void blk_mq_insert_requests(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx,
                                struct list_head *list);
 
index f40c9acf88955df7d23bbf669127fa0e5236d80a..e20be82588542918a0440ddc9bb1bde1d5bf5f8b 100644 (file)
@@ -46,6 +46,7 @@ enum opal_response_token {
 #define GENERIC_HOST_SESSION_NUM 0x41
 
 #define TPER_SYNC_SUPPORTED 0x01
+#define MBR_ENABLED_MASK 0x10
 
 #define TINY_ATOM_DATA_MASK 0x3F
 #define TINY_ATOM_SIGNED 0x40
index 9b30ae5ab843b0508494cae08a78ae831c67e58b..9ed51d0c6b1d171fc2eab785ef854b64f93721fe 100644 (file)
@@ -80,6 +80,7 @@ struct parsed_resp {
 
 struct opal_dev {
        bool supported;
+       bool mbr_enabled;
 
        void *data;
        sec_send_recv *send_recv;
@@ -283,6 +284,14 @@ static bool check_tper(const void *data)
        return true;
 }
 
+static bool check_mbrenabled(const void *data)
+{
+       const struct d0_locking_features *lfeat = data;
+       u8 sup_feat = lfeat->supported_features;
+
+       return !!(sup_feat & MBR_ENABLED_MASK);
+}
+
 static bool check_sum(const void *data)
 {
        const struct d0_single_user_mode *sum = data;
@@ -417,6 +426,7 @@ static int opal_discovery0_end(struct opal_dev *dev)
        u32 hlen = be32_to_cpu(hdr->length);
 
        print_buffer(dev->resp, hlen);
+       dev->mbr_enabled = false;
 
        if (hlen > IO_BUFFER_LENGTH - sizeof(*hdr)) {
                pr_debug("Discovery length overflows buffer (%zu+%u)/%u\n",
@@ -442,6 +452,8 @@ static int opal_discovery0_end(struct opal_dev *dev)
                        check_geometry(dev, body);
                        break;
                case FC_LOCKING:
+                       dev->mbr_enabled = check_mbrenabled(body->features);
+                       break;
                case FC_ENTERPRISE:
                case FC_DATASTORE:
                        /* some ignored properties */
@@ -2190,6 +2202,21 @@ static int __opal_lock_unlock(struct opal_dev *dev,
        return next(dev);
 }
 
+static int __opal_set_mbr_done(struct opal_dev *dev, struct opal_key *key)
+{
+       u8 mbr_done_tf = 1;
+       const struct opal_step mbrdone_step [] = {
+               { opal_discovery0, },
+               { start_admin1LSP_opal_session, key },
+               { set_mbr_done, &mbr_done_tf },
+               { end_opal_session, },
+               { NULL, }
+       };
+
+       dev->steps = mbrdone_step;
+       return next(dev);
+}
+
 static int opal_lock_unlock(struct opal_dev *dev,
                            struct opal_lock_unlock *lk_unlk)
 {
@@ -2345,6 +2372,11 @@ bool opal_unlock_from_suspend(struct opal_dev *dev)
                                 suspend->unlk.session.sum);
                        was_failure = true;
                }
+               if (dev->mbr_enabled) {
+                       ret = __opal_set_mbr_done(dev, &suspend->unlk.session.opal_key);
+                       if (ret)
+                               pr_debug("Failed to set MBR Done in S3 resume\n");
+               }
        }
        mutex_unlock(&dev->dev_lock);
        return was_failure;
index 277a7a02cba5c53220220493c12d158134d131d5..acc816b67582f30524ad19f66843b071dfcef6ae 100644 (file)
@@ -1897,6 +1897,8 @@ int nvme_init_identify(struct nvme_ctrl *ctrl)
                ctrl->cntlid = le16_to_cpu(id->cntlid);
                ctrl->hmpre = le32_to_cpu(id->hmpre);
                ctrl->hmmin = le32_to_cpu(id->hmmin);
+               ctrl->hmminds = le32_to_cpu(id->hmminds);
+               ctrl->hmmaxd = le16_to_cpu(id->hmmaxd);
        }
 
        kfree(id);
@@ -2377,10 +2379,11 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
 
        nvme_report_ns_ids(ctrl, ns->ns_id, id, ns->eui, ns->nguid, &ns->uuid);
 
-       if (nvme_nvm_ns_supported(ns, id) &&
-                               nvme_nvm_register(ns, disk_name, node)) {
-               dev_warn(ctrl->device, "%s: LightNVM init failure\n", __func__);
-               goto out_free_id;
+       if ((ctrl->quirks & NVME_QUIRK_LIGHTNVM) && id->vs[0] == 0x1) {
+               if (nvme_nvm_register(ns, disk_name, node)) {
+                       dev_warn(ctrl->device, "LightNVM init failure\n");
+                       goto out_free_id;
+               }
        }
 
        disk = alloc_disk_node(0, node);
index c1a28569e843c67f6e17c97bb1cf1497ff48b02d..1f79e3f141e64d50002d3c4c0459502e1463c0b7 100644 (file)
@@ -955,29 +955,3 @@ void nvme_nvm_unregister_sysfs(struct nvme_ns *ns)
        sysfs_remove_group(&disk_to_dev(ns->disk)->kobj,
                                        &nvm_dev_attr_group);
 }
-
-/* move to shared place when used in multiple places. */
-#define PCI_VENDOR_ID_CNEX 0x1d1d
-#define PCI_DEVICE_ID_CNEX_WL 0x2807
-#define PCI_DEVICE_ID_CNEX_QEMU 0x1f1f
-
-int nvme_nvm_ns_supported(struct nvme_ns *ns, struct nvme_id_ns *id)
-{
-       struct nvme_ctrl *ctrl = ns->ctrl;
-       /* XXX: this is poking into PCI structures from generic code! */
-       struct pci_dev *pdev = to_pci_dev(ctrl->dev);
-
-       /* QEMU NVMe simulator - PCI ID + Vendor specific bit */
-       if (pdev->vendor == PCI_VENDOR_ID_CNEX &&
-                               pdev->device == PCI_DEVICE_ID_CNEX_QEMU &&
-                                                       id->vs[0] == 0x1)
-               return 1;
-
-       /* CNEX Labs - PCI ID + Vendor specific bit */
-       if (pdev->vendor == PCI_VENDOR_ID_CNEX &&
-                               pdev->device == PCI_DEVICE_ID_CNEX_WL &&
-                                                       id->vs[0] == 0x1)
-               return 1;
-
-       return 0;
-}
index a19a587d60ed5ae2dece12c058db03fea9504430..d3f3c4447515703a26683580071bd6fd160cd102 100644 (file)
@@ -75,6 +75,11 @@ enum nvme_quirks {
         * The deepest sleep state should not be used.
         */
        NVME_QUIRK_NO_DEEPEST_PS                = (1 << 5),
+
+       /*
+        * Supports the LighNVM command set if indicated in vs[1].
+        */
+       NVME_QUIRK_LIGHTNVM                     = (1 << 6),
 };
 
 /*
@@ -176,8 +181,11 @@ struct nvme_ctrl {
        u64 ps_max_latency_us;
        bool apst_enabled;
 
+       /* PCIe only: */
        u32 hmpre;
        u32 hmmin;
+       u32 hmminds;
+       u16 hmmaxd;
 
        /* Fabrics only */
        u16 sqsize;
@@ -320,7 +328,6 @@ void nvme_stop_keep_alive(struct nvme_ctrl *ctrl);
 int nvme_reset_ctrl(struct nvme_ctrl *ctrl);
 
 #ifdef CONFIG_NVM
-int nvme_nvm_ns_supported(struct nvme_ns *ns, struct nvme_id_ns *id);
 int nvme_nvm_register(struct nvme_ns *ns, char *disk_name, int node);
 void nvme_nvm_unregister(struct nvme_ns *ns);
 int nvme_nvm_register_sysfs(struct nvme_ns *ns);
@@ -339,10 +346,6 @@ static inline int nvme_nvm_register_sysfs(struct nvme_ns *ns)
        return 0;
 }
 static inline void nvme_nvm_unregister_sysfs(struct nvme_ns *ns) {};
-static inline int nvme_nvm_ns_supported(struct nvme_ns *ns, struct nvme_id_ns *id)
-{
-       return 0;
-}
 static inline int nvme_nvm_ioctl(struct nvme_ns *ns, unsigned int cmd,
                                                        unsigned long arg)
 {
index 198245faba6ba1052b90cdae4001ae920dbe11ce..4a2121335f48a0b4af31b413e04af8f9a6a52a2e 100644 (file)
@@ -1612,21 +1612,23 @@ static void nvme_free_host_mem(struct nvme_dev *dev)
        dev->host_mem_descs = NULL;
 }
 
-static int nvme_alloc_host_mem(struct nvme_dev *dev, u64 min, u64 preferred)
+static int __nvme_alloc_host_mem(struct nvme_dev *dev, u64 preferred,
+               u32 chunk_size)
 {
        struct nvme_host_mem_buf_desc *descs;
-       u32 chunk_size, max_entries, len;
+       u32 max_entries, len;
        dma_addr_t descs_dma;
        int i = 0;
        void **bufs;
        u64 size = 0, tmp;
 
-       /* start big and work our way down */
-       chunk_size = min(preferred, (u64)PAGE_SIZE << MAX_ORDER);
-retry:
        tmp = (preferred + chunk_size - 1);
        do_div(tmp, chunk_size);
        max_entries = tmp;
+
+       if (dev->ctrl.hmmaxd && dev->ctrl.hmmaxd < max_entries)
+               max_entries = dev->ctrl.hmmaxd;
+
        descs = dma_zalloc_coherent(dev->dev, max_entries * sizeof(*descs),
                        &descs_dma, GFP_KERNEL);
        if (!descs)
@@ -1650,15 +1652,9 @@ retry:
                i++;
        }
 
-       if (!size || (min && size < min)) {
-               dev_warn(dev->ctrl.device,
-                       "failed to allocate host memory buffer.\n");
+       if (!size)
                goto out_free_bufs;
-       }
 
-       dev_info(dev->ctrl.device,
-               "allocated %lld MiB host memory buffer.\n",
-               size >> ilog2(SZ_1M));
        dev->nr_host_mem_descs = i;
        dev->host_mem_size = size;
        dev->host_mem_descs = descs;
@@ -1679,21 +1675,35 @@ out_free_descs:
        dma_free_coherent(dev->dev, max_entries * sizeof(*descs), descs,
                        descs_dma);
 out:
-       /* try a smaller chunk size if we failed early */
-       if (chunk_size >= PAGE_SIZE * 2 && (i == 0 || size < min)) {
-               chunk_size /= 2;
-               goto retry;
-       }
        dev->host_mem_descs = NULL;
        return -ENOMEM;
 }
 
-static void nvme_setup_host_mem(struct nvme_dev *dev)
+static int nvme_alloc_host_mem(struct nvme_dev *dev, u64 min, u64 preferred)
+{
+       u32 chunk_size;
+
+       /* start big and work our way down */
+       for (chunk_size = min_t(u64, preferred, PAGE_SIZE * MAX_ORDER_NR_PAGES);
+            chunk_size >= max_t(u32, dev->ctrl.hmminds * 4096, PAGE_SIZE * 2);
+            chunk_size /= 2) {
+               if (!__nvme_alloc_host_mem(dev, preferred, chunk_size)) {
+                       if (!min || dev->host_mem_size >= min)
+                               return 0;
+                       nvme_free_host_mem(dev);
+               }
+       }
+
+       return -ENOMEM;
+}
+
+static int nvme_setup_host_mem(struct nvme_dev *dev)
 {
        u64 max = (u64)max_host_mem_size_mb * SZ_1M;
        u64 preferred = (u64)dev->ctrl.hmpre * 4096;
        u64 min = (u64)dev->ctrl.hmmin * 4096;
        u32 enable_bits = NVME_HOST_MEM_ENABLE;
+       int ret = 0;
 
        preferred = min(preferred, max);
        if (min > max) {
@@ -1701,7 +1711,7 @@ static void nvme_setup_host_mem(struct nvme_dev *dev)
                        "min host memory (%lld MiB) above limit (%d MiB).\n",
                        min >> ilog2(SZ_1M), max_host_mem_size_mb);
                nvme_free_host_mem(dev);
-               return;
+               return 0;
        }
 
        /*
@@ -1715,12 +1725,21 @@ static void nvme_setup_host_mem(struct nvme_dev *dev)
        }
 
        if (!dev->host_mem_descs) {
-               if (nvme_alloc_host_mem(dev, min, preferred))
-                       return;
+               if (nvme_alloc_host_mem(dev, min, preferred)) {
+                       dev_warn(dev->ctrl.device,
+                               "failed to allocate host memory buffer.\n");
+                       return 0; /* controller must work without HMB */
+               }
+
+               dev_info(dev->ctrl.device,
+                       "allocated %lld MiB host memory buffer.\n",
+                       dev->host_mem_size >> ilog2(SZ_1M));
        }
 
-       if (nvme_set_host_mem(dev, enable_bits))
+       ret = nvme_set_host_mem(dev, enable_bits);
+       if (ret)
                nvme_free_host_mem(dev);
+       return ret;
 }
 
 static int nvme_setup_io_queues(struct nvme_dev *dev)
@@ -2164,8 +2183,11 @@ static void nvme_reset_work(struct work_struct *work)
                                 "unable to allocate dma for dbbuf\n");
        }
 
-       if (dev->ctrl.hmpre)
-               nvme_setup_host_mem(dev);
+       if (dev->ctrl.hmpre) {
+               result = nvme_setup_host_mem(dev);
+               if (result < 0)
+                       goto out;
+       }
 
        result = nvme_setup_io_queues(dev);
        if (result)
@@ -2497,6 +2519,10 @@ static const struct pci_device_id nvme_id_table[] = {
                .driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY, },
        { PCI_DEVICE(0x144d, 0xa822),   /* Samsung PM1725a */
                .driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY, },
+       { PCI_DEVICE(0x1d1d, 0x1f1f),   /* LighNVM qemu device */
+               .driver_data = NVME_QUIRK_LIGHTNVM, },
+       { PCI_DEVICE(0x1d1d, 0x2807),   /* CNEX WL */
+               .driver_data = NVME_QUIRK_LIGHTNVM, },
        { PCI_DEVICE_CLASS(PCI_CLASS_STORAGE_EXPRESS, 0xffffff) },
        { PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2001) },
        { PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2003) },
index 5144f9103723e85662fa2581c44bf90756048e4d..87723c86f136f0e48c64a4c39fa9f0dcb3ad979d 100644 (file)
@@ -226,7 +226,9 @@ struct nvme_id_ctrl {
        __le16                  mntmt;
        __le16                  mxtmt;
        __le32                  sanicap;
-       __u8                    rsvd332[180];
+       __le32                  hmminds;
+       __le16                  hmmaxd;
+       __u8                    rsvd338[174];
        __u8                    sqes;
        __u8                    cqes;
        __le16                  maxcmd;
index e1eeb0a8a9693083e31bb5e45720764b46578158..54d21783e18dd12dd7e2ad563c2debc9e046143d 100644 (file)
@@ -434,20 +434,9 @@ __FORTIFY_INLINE char *strcpy(char *p, const char *q)
  * @count: The number of bytes to copy
  * @pad: Character to use for padding if space is left in destination.
  */
-__FORTIFY_INLINE void memcpy_and_pad(void *dest, size_t dest_len,
-                                    const void *src, size_t count, int pad)
+static inline void memcpy_and_pad(void *dest, size_t dest_len,
+                                 const void *src, size_t count, int pad)
 {
-       size_t dest_size = __builtin_object_size(dest, 0);
-       size_t src_size = __builtin_object_size(src, 0);
-
-       if (__builtin_constant_p(dest_len) && __builtin_constant_p(count)) {
-               if (dest_size < dest_len && dest_size < count)
-                       __write_overflow();
-               else if (src_size < dest_len && src_size < count)
-                       __read_overflow3();
-       }
-       if (dest_size < dest_len)
-               fortify_panic(__func__);
        if (dest_len > count) {
                memcpy(dest, src, count);
                memset(dest + count, pad,  dest_len - count);
index f815aaaef755af6dae63d71b154f5012e6a2a360..1fd7ff1a46f792d3a69a006e2406f86d87100cf3 100644 (file)
@@ -397,7 +397,6 @@ DECLARE_EVENT_CLASS(block_get_rq,
 
        TP_fast_assign(
                __entry->dev            = bio ? bio_dev(bio) : 0;
-               __entry->dev            = bio_dev(bio);
                __entry->sector         = bio ? bio->bi_iter.bi_sector : 0;
                __entry->nr_sector      = bio ? bio_sectors(bio) : 0;
                blk_fill_rwbs(__entry->rwbs,
@@ -414,7 +413,7 @@ DECLARE_EVENT_CLASS(block_get_rq,
 /**
  * block_getrq - get a free request entry in queue for block IO operations
  * @q: queue for operations
- * @bio: pending block IO operation
+ * @bio: pending block IO operation (can be %NULL)
  * @rw: low bit indicates a read (%0) or a write (%1)
  *
  * A request struct for queue @q has been allocated to handle the
@@ -430,7 +429,7 @@ DEFINE_EVENT(block_get_rq, block_getrq,
 /**
  * block_sleeprq - waiting to get a free request entry in queue for block IO operation
  * @q: queue for operation
- * @bio: pending block IO operation
+ * @bio: pending block IO operation (can be %NULL)
  * @rw: low bit indicates a read (%0) or a write (%1)
  *
  * In the case where a request struct cannot be provided for queue @q
index f028a9a472fd9b2c7098bce8fe622fd58ba2f140..e19606bb41a0b5b48f6cbca8268eb529718da45c 100644 (file)
@@ -569,8 +569,10 @@ static int cgwb_create(struct backing_dev_info *bdi,
 
        /* need to create a new one */
        wb = kmalloc(sizeof(*wb), gfp);
-       if (!wb)
-               return -ENOMEM;
+       if (!wb) {
+               ret = -ENOMEM;
+               goto out_put;
+       }
 
        ret = wb_init(wb, bdi, blkcg_css->id, gfp);
        if (ret)