Merge branch 'core-rcu-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git...
authorLinus Torvalds <torvalds@linux-foundation.org>
Mon, 4 Jun 2018 22:54:04 +0000 (15:54 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Mon, 4 Jun 2018 22:54:04 +0000 (15:54 -0700)
Pull RCU updates from Ingo Molnar:

 - updates to the handling of expedited grace periods

 - updates to reduce lock contention in the rcu_node combining tree

   [ These are in preparation for the consolidation of RCU-bh,
     RCU-preempt, and RCU-sched into a single flavor, which was
     requested by Linus in response to a security flaw whose root cause
     included confusion between the multiple flavors of RCU ]

 - torture-test updates that save their users some time and effort

 - miscellaneous fixes

* 'core-rcu-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (44 commits)
  rcu/x86: Provide early rcu_cpu_starting() callback
  torture: Make kvm-find-errors.sh find build warnings
  rcutorture: Abbreviate kvm.sh summary lines
  rcutorture: Print end-of-test state in kvm.sh summary
  rcutorture: Print end-of-test state
  torture: Fold parse-torture.sh into parse-console.sh
  torture: Add a script to edit output from failed runs
  rcu: Update list of rcu_future_grace_period() trace events
  rcu: Drop early GP request check from rcu_gp_kthread()
  rcu: Simplify and inline cpu_needs_another_gp()
  rcu: The rcu_gp_cleanup() function does not need cpu_needs_another_gp()
  rcu: Make rcu_start_this_gp() check for out-of-range requests
  rcu: Add funnel locking to rcu_start_this_gp()
  rcu: Make rcu_start_future_gp() caller select grace period
  rcu: Inline rcu_start_gp_advanced() into rcu_start_future_gp()
  rcu: Clear request other than RCU_GP_FLAG_INIT at GP end
  rcu: Cleanup, don't put ->completed into an int
  rcu: Switch __rcu_process_callbacks() to rcu_accelerate_cbs()
  rcu: Avoid __call_rcu_core() root rcu_node ->lock acquisition
  rcu: Make rcu_migrate_callbacks wake GP kthread when needed
  ...

1  2 
drivers/nvme/host/core.c
include/linux/sched.h
kernel/sched/core.c

diff --combined drivers/nvme/host/core.c
index 04a20da767868e84f5eb9a24752dc0e02f5901aa,00f5aad34fbcf8c077a0bc89c69d64db027db46d..c8b30067b6aeb91de89ef5a3d0ab95f8c35a8c10
@@@ -101,15 -101,6 +101,15 @@@ static void nvme_ns_remove(struct nvme_
  static int nvme_revalidate_disk(struct gendisk *disk);
  static void nvme_put_subsystem(struct nvme_subsystem *subsys);
  
 +static void nvme_queue_scan(struct nvme_ctrl *ctrl)
 +{
 +      /*
 +       * Only new queue scan work when admin and IO queues are both alive
 +       */
 +      if (ctrl->state == NVME_CTRL_LIVE)
 +              queue_work(nvme_wq, &ctrl->scan_work);
 +}
 +
  int nvme_reset_ctrl(struct nvme_ctrl *ctrl)
  {
        if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_RESETTING))
@@@ -253,6 -244,9 +253,6 @@@ EXPORT_SYMBOL_GPL(nvme_complete_rq)
  
  void nvme_cancel_request(struct request *req, void *data, bool reserved)
  {
 -      if (!blk_mq_request_started(req))
 -              return;
 -
        dev_dbg_ratelimited(((struct nvme_ctrl *) data)->device,
                                "Cancelling I/O %d", req->tag);
  
@@@ -357,7 -351,7 +357,7 @@@ static void nvme_free_ns_head(struct kr
        nvme_mpath_remove_disk(head);
        ida_simple_remove(&head->subsys->ns_ida, head->instance);
        list_del_init(&head->entry);
-       cleanup_srcu_struct(&head->srcu);
+       cleanup_srcu_struct_quiesced(&head->srcu);
        nvme_put_subsystem(head->subsys);
        kfree(head);
  }
@@@ -1039,21 -1033,6 +1039,21 @@@ int nvme_set_queue_count(struct nvme_ct
  }
  EXPORT_SYMBOL_GPL(nvme_set_queue_count);
  
 +#define NVME_AEN_SUPPORTED \
 +      (NVME_AEN_CFG_NS_ATTR | NVME_AEN_CFG_FW_ACT)
 +
 +static void nvme_enable_aen(struct nvme_ctrl *ctrl)
 +{
 +      u32 result;
 +      int status;
 +
 +      status = nvme_set_features(ctrl, NVME_FEAT_ASYNC_EVENT,
 +                      ctrl->oaes & NVME_AEN_SUPPORTED, NULL, 0, &result);
 +      if (status)
 +              dev_warn(ctrl->device, "Failed to configure AEN (cfg %x)\n",
 +                       ctrl->oaes & NVME_AEN_SUPPORTED);
 +}
 +
  static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio)
  {
        struct nvme_user_io io;
@@@ -1372,19 -1351,13 +1372,19 @@@ static void nvme_set_chunk_size(struct 
        blk_queue_chunk_sectors(ns->queue, rounddown_pow_of_two(chunk_size));
  }
  
 -static void nvme_config_discard(struct nvme_ctrl *ctrl,
 -              unsigned stream_alignment, struct request_queue *queue)
 +static void nvme_config_discard(struct nvme_ns *ns)
  {
 +      struct nvme_ctrl *ctrl = ns->ctrl;
 +      struct request_queue *queue = ns->queue;
        u32 size = queue_logical_block_size(queue);
  
 -      if (stream_alignment)
 -              size *= stream_alignment;
 +      if (!(ctrl->oncs & NVME_CTRL_ONCS_DSM)) {
 +              blk_queue_flag_clear(QUEUE_FLAG_DISCARD, queue);
 +              return;
 +      }
 +
 +      if (ctrl->nr_streams && ns->sws && ns->sgs)
 +              size *= ns->sws * ns->sgs;
  
        BUILD_BUG_ON(PAGE_SIZE / sizeof(struct nvme_dsm_range) <
                        NVME_DSM_MAX_RANGES);
        queue->limits.discard_alignment = 0;
        queue->limits.discard_granularity = size;
  
 +      /* If discard is already enabled, don't reset queue limits */
 +      if (blk_queue_flag_test_and_set(QUEUE_FLAG_DISCARD, queue))
 +              return;
 +
        blk_queue_max_discard_sectors(queue, UINT_MAX);
        blk_queue_max_discard_segments(queue, NVME_DSM_MAX_RANGES);
 -      blk_queue_flag_set(QUEUE_FLAG_DISCARD, queue);
  
        if (ctrl->quirks & NVME_QUIRK_DEALLOCATE_ZEROES)
                blk_queue_max_write_zeroes_sectors(queue, UINT_MAX);
@@@ -1441,6 -1411,10 +1441,6 @@@ static void nvme_update_disk_info(struc
  {
        sector_t capacity = le64_to_cpup(&id->nsze) << (ns->lba_shift - 9);
        unsigned short bs = 1 << ns->lba_shift;
 -      unsigned stream_alignment = 0;
 -
 -      if (ns->ctrl->nr_streams && ns->sws && ns->sgs)
 -              stream_alignment = ns->sws * ns->sgs;
  
        blk_mq_freeze_queue(disk->queue);
        blk_integrity_unregister(disk);
                nvme_init_integrity(disk, ns->ms, ns->pi_type);
        if (ns->ms && !nvme_ns_has_pi(ns) && !blk_get_integrity(disk))
                capacity = 0;
 -      set_capacity(disk, capacity);
  
 -      if (ns->ctrl->oncs & NVME_CTRL_ONCS_DSM)
 -              nvme_config_discard(ns->ctrl, stream_alignment, disk->queue);
 +      set_capacity(disk, capacity);
 +      nvme_config_discard(ns);
        blk_mq_unfreeze_queue(disk->queue);
  }
  
@@@ -1472,8 -1447,8 +1472,8 @@@ static void __nvme_revalidate_disk(stru
        if (ns->lba_shift == 0)
                ns->lba_shift = 9;
        ns->noiob = le16_to_cpu(id->noiob);
 -      ns->ext = ns->ms && (id->flbas & NVME_NS_FLBAS_META_EXT);
        ns->ms = le16_to_cpu(id->lbaf[id->flbas & NVME_NS_FLBAS_LBA_MASK].ms);
 +      ns->ext = ns->ms && (id->flbas & NVME_NS_FLBAS_META_EXT);
        /* the PI implementation requires metadata equal t10 pi tuple size */
        if (ns->ms == sizeof(struct t10_pi_tuple))
                ns->pi_type = id->dps & NVME_NS_DPS_PI_MASK;
@@@ -1602,7 -1577,7 +1602,7 @@@ static int nvme_pr_reserve(struct block
  static int nvme_pr_preempt(struct block_device *bdev, u64 old, u64 new,
                enum pr_type type, bool abort)
  {
 -      u32 cdw10 = nvme_pr_type(type) << 8 | abort ? 2 : 1;
 +      u32 cdw10 = nvme_pr_type(type) << 8 | (abort ? 2 : 1);
        return nvme_pr_command(bdev, cdw10, old, new, nvme_cmd_resv_acquire);
  }
  
@@@ -1614,7 -1589,7 +1614,7 @@@ static int nvme_pr_clear(struct block_d
  
  static int nvme_pr_release(struct block_device *bdev, u64 key, enum pr_type type)
  {
 -      u32 cdw10 = nvme_pr_type(type) << 8 | key ? 1 << 3 : 0;
 +      u32 cdw10 = nvme_pr_type(type) << 8 | (key ? 1 << 3 : 0);
        return nvme_pr_command(bdev, cdw10, key, 0, nvme_cmd_resv_release);
  }
  
@@@ -2208,8 -2183,7 +2208,8 @@@ static int nvme_init_subsystem(struct n
                 * Verify that the subsystem actually supports multiple
                 * controllers, else bail out.
                 */
 -              if (nvme_active_ctrls(found) && !(id->cmic & (1 << 1))) {
 +              if (!ctrl->opts->discovery_nqn &&
 +                  nvme_active_ctrls(found) && !(id->cmic & (1 << 1))) {
                        dev_err(ctrl->device,
                                "ignoring ctrl due to duplicate subnqn (%s).\n",
                                found->subnqn);
@@@ -2340,7 -2314,7 +2340,7 @@@ int nvme_init_identify(struct nvme_ctr
        if (id->lpa & NVME_CTRL_LPA_CMD_EFFECTS_LOG) {
                ret = nvme_get_effects_log(ctrl);
                if (ret < 0)
 -                      return ret;
 +                      goto out_free;
        }
  
        if (!ctrl->identified) {
  
        ctrl->oacs = le16_to_cpu(id->oacs);
        ctrl->oncs = le16_to_cpup(&id->oncs);
 +      ctrl->oaes = le32_to_cpu(id->oaes);
        atomic_set(&ctrl->abort_limit, id->acl + 1);
        ctrl->vwc = id->vwc;
        ctrl->cntlid = le16_to_cpup(&id->cntlid);
@@@ -3197,42 -3170,6 +3197,42 @@@ static void nvme_scan_ns_sequential(str
        nvme_remove_invalid_namespaces(ctrl, nn);
  }
  
 +static bool nvme_scan_changed_ns_log(struct nvme_ctrl *ctrl)
 +{
 +      size_t log_size = NVME_MAX_CHANGED_NAMESPACES * sizeof(__le32);
 +      __le32 *log;
 +      int error, i;
 +      bool ret = false;
 +
 +      log = kzalloc(log_size, GFP_KERNEL);
 +      if (!log)
 +              return false;
 +
 +      error = nvme_get_log(ctrl, NVME_LOG_CHANGED_NS, log, log_size);
 +      if (error) {
 +              dev_warn(ctrl->device,
 +                      "reading changed ns log failed: %d\n", error);
 +              goto out_free_log;
 +      }
 +
 +      if (log[0] == cpu_to_le32(0xffffffff))
 +              goto out_free_log;
 +
 +      for (i = 0; i < NVME_MAX_CHANGED_NAMESPACES; i++) {
 +              u32 nsid = le32_to_cpu(log[i]);
 +
 +              if (nsid == 0)
 +                      break;
 +              dev_info(ctrl->device, "rescanning namespace %d.\n", nsid);
 +              nvme_validate_ns(ctrl, nsid);
 +      }
 +      ret = true;
 +
 +out_free_log:
 +      kfree(log);
 +      return ret;
 +}
 +
  static void nvme_scan_work(struct work_struct *work)
  {
        struct nvme_ctrl *ctrl =
  
        WARN_ON_ONCE(!ctrl->tagset);
  
 +      if (test_and_clear_bit(EVENT_NS_CHANGED, &ctrl->events)) {
 +              if (nvme_scan_changed_ns_log(ctrl))
 +                      goto out_sort_namespaces;
 +              dev_info(ctrl->device, "rescanning namespaces.\n");
 +      }
 +
        if (nvme_identify_ctrl(ctrl, &id))
                return;
  
        if (ctrl->vs >= NVME_VS(1, 1, 0) &&
            !(ctrl->quirks & NVME_QUIRK_IDENTIFY_CNS)) {
                if (!nvme_scan_ns_list(ctrl, nn))
 -                      goto done;
 +                      goto out_free_id;
        }
        nvme_scan_ns_sequential(ctrl, nn);
 - done:
 +out_free_id:
 +      kfree(id);
 +out_sort_namespaces:
        down_write(&ctrl->namespaces_rwsem);
        list_sort(NULL, &ctrl->namespaces, ns_cmp);
        up_write(&ctrl->namespaces_rwsem);
 -      kfree(id);
  }
  
 -void nvme_queue_scan(struct nvme_ctrl *ctrl)
 -{
 -      /*
 -       * Only new queue scan work when admin and IO queues are both alive
 -       */
 -      if (ctrl->state == NVME_CTRL_LIVE)
 -              queue_work(nvme_wq, &ctrl->scan_work);
 -}
 -EXPORT_SYMBOL_GPL(nvme_queue_scan);
 -
  /*
   * This function iterates the namespace list unlocked to allow recovery from
   * controller failure. It is up to the caller to ensure the namespace list is
@@@ -3382,23 -3322,8 +3382,23 @@@ static void nvme_fw_act_work(struct wor
        nvme_get_fw_slot_info(ctrl);
  }
  
 +static void nvme_handle_aen_notice(struct nvme_ctrl *ctrl, u32 result)
 +{
 +      switch ((result & 0xff00) >> 8) {
 +      case NVME_AER_NOTICE_NS_CHANGED:
 +              set_bit(EVENT_NS_CHANGED, &ctrl->events);
 +              nvme_queue_scan(ctrl);
 +              break;
 +      case NVME_AER_NOTICE_FW_ACT_STARTING:
 +              queue_work(nvme_wq, &ctrl->fw_act_work);
 +              break;
 +      default:
 +              dev_warn(ctrl->device, "async event result %08x\n", result);
 +      }
 +}
 +
  void nvme_complete_async_event(struct nvme_ctrl *ctrl, __le16 status,
 -              union nvme_result *res)
 +              volatile union nvme_result *res)
  {
        u32 result = le32_to_cpu(res->u32);
  
                return;
  
        switch (result & 0x7) {
 +      case NVME_AER_NOTICE:
 +              nvme_handle_aen_notice(ctrl, result);
 +              break;
        case NVME_AER_ERROR:
        case NVME_AER_SMART:
        case NVME_AER_CSS:
        default:
                break;
        }
 -
 -      switch (result & 0xff07) {
 -      case NVME_AER_NOTICE_NS_CHANGED:
 -              dev_info(ctrl->device, "rescanning\n");
 -              nvme_queue_scan(ctrl);
 -              break;
 -      case NVME_AER_NOTICE_FW_ACT_STARTING:
 -              queue_work(nvme_wq, &ctrl->fw_act_work);
 -              break;
 -      default:
 -              dev_warn(ctrl->device, "async event result %08x\n", result);
 -      }
        queue_work(nvme_wq, &ctrl->async_event_work);
  }
  EXPORT_SYMBOL_GPL(nvme_complete_async_event);
@@@ -3440,7 -3374,6 +3440,7 @@@ void nvme_start_ctrl(struct nvme_ctrl *
  
        if (ctrl->queue_count > 1) {
                nvme_queue_scan(ctrl);
 +              nvme_enable_aen(ctrl);
                queue_work(nvme_wq, &ctrl->async_event_work);
                nvme_start_queues(ctrl);
        }
diff --combined include/linux/sched.h
index ca3f3eae8980c3981509e016134064ac00fea83d,8e2a84615bb62879fd1ba75350518de7416a72f9..5a0c10b45273be3a70bd7d41d0364cf3bc35ad08
@@@ -1433,8 -1433,7 +1433,8 @@@ static inline bool is_percpu_thread(voi
  #define PFA_NO_NEW_PRIVS              0       /* May not gain new privileges. */
  #define PFA_SPREAD_PAGE                       1       /* Spread page cache over cpuset */
  #define PFA_SPREAD_SLAB                       2       /* Spread some slab caches over cpuset */
 -
 +#define PFA_SPEC_SSB_DISABLE          3       /* Speculative Store Bypass disabled */
 +#define PFA_SPEC_SSB_FORCE_DISABLE    4       /* Speculative Store Bypass force disabled*/
  
  #define TASK_PFA_TEST(name, func)                                     \
        static inline bool task_##func(struct task_struct *p)           \
@@@ -1459,13 -1458,6 +1459,13 @@@ TASK_PFA_TEST(SPREAD_SLAB, spread_slab
  TASK_PFA_SET(SPREAD_SLAB, spread_slab)
  TASK_PFA_CLEAR(SPREAD_SLAB, spread_slab)
  
 +TASK_PFA_TEST(SPEC_SSB_DISABLE, spec_ssb_disable)
 +TASK_PFA_SET(SPEC_SSB_DISABLE, spec_ssb_disable)
 +TASK_PFA_CLEAR(SPEC_SSB_DISABLE, spec_ssb_disable)
 +
 +TASK_PFA_TEST(SPEC_SSB_FORCE_DISABLE, spec_ssb_force_disable)
 +TASK_PFA_SET(SPEC_SSB_FORCE_DISABLE, spec_ssb_force_disable)
 +
  static inline void
  current_restore_flags(unsigned long orig_flags, unsigned long flags)
  {
@@@ -1661,7 -1653,6 +1661,6 @@@ static inline int test_tsk_need_resched
   * explicit rescheduling in places that are safe. The return
   * value indicates whether a reschedule was done in fact.
   * cond_resched_lock() will drop the spinlock before scheduling,
-  * cond_resched_softirq() will enable bhs before scheduling.
   */
  #ifndef CONFIG_PREEMPT
  extern int _cond_resched(void);
@@@ -1681,13 -1672,6 +1680,6 @@@ extern int __cond_resched_lock(spinlock
        __cond_resched_lock(lock);                              \
  })
  
- extern int __cond_resched_softirq(void);
- #define cond_resched_softirq() ({                                     \
-       ___might_sleep(__FILE__, __LINE__, SOFTIRQ_DISABLE_OFFSET);     \
-       __cond_resched_softirq();                                       \
- })
  static inline void cond_resched_rcu(void)
  {
  #if defined(CONFIG_DEBUG_ATOMIC_SLEEP) || !defined(CONFIG_PREEMPT_RCU)
diff --combined kernel/sched/core.c
index 211890edf37e48feefea9f3f1166a3ba2c58270b,3e673703417571b31ee58e07bc560f0871307a2f..e27034bd954ea4e8dd8ad1780df1fccf856c3e3f
@@@ -881,33 -881,6 +881,33 @@@ void check_preempt_curr(struct rq *rq, 
  }
  
  #ifdef CONFIG_SMP
 +
 +static inline bool is_per_cpu_kthread(struct task_struct *p)
 +{
 +      if (!(p->flags & PF_KTHREAD))
 +              return false;
 +
 +      if (p->nr_cpus_allowed != 1)
 +              return false;
 +
 +      return true;
 +}
 +
 +/*
 + * Per-CPU kthreads are allowed to run on !actie && online CPUs, see
 + * __set_cpus_allowed_ptr() and select_fallback_rq().
 + */
 +static inline bool is_cpu_allowed(struct task_struct *p, int cpu)
 +{
 +      if (!cpumask_test_cpu(cpu, &p->cpus_allowed))
 +              return false;
 +
 +      if (is_per_cpu_kthread(p))
 +              return cpu_online(cpu);
 +
 +      return cpu_active(cpu);
 +}
 +
  /*
   * This is how migration works:
   *
@@@ -965,8 -938,16 +965,8 @@@ struct migration_arg 
  static struct rq *__migrate_task(struct rq *rq, struct rq_flags *rf,
                                 struct task_struct *p, int dest_cpu)
  {
 -      if (p->flags & PF_KTHREAD) {
 -              if (unlikely(!cpu_online(dest_cpu)))
 -                      return rq;
 -      } else {
 -              if (unlikely(!cpu_active(dest_cpu)))
 -                      return rq;
 -      }
 -
        /* Affinity changed (again). */
 -      if (!cpumask_test_cpu(dest_cpu, &p->cpus_allowed))
 +      if (!is_cpu_allowed(p, dest_cpu))
                return rq;
  
        update_rq_clock(rq);
@@@ -1495,9 -1476,10 +1495,9 @@@ static int select_fallback_rq(int cpu, 
        for (;;) {
                /* Any allowed, online CPU? */
                for_each_cpu(dest_cpu, &p->cpus_allowed) {
 -                      if (!(p->flags & PF_KTHREAD) && !cpu_active(dest_cpu))
 -                              continue;
 -                      if (!cpu_online(dest_cpu))
 +                      if (!is_cpu_allowed(p, dest_cpu))
                                continue;
 +
                        goto out;
                }
  
@@@ -1560,7 -1542,8 +1560,7 @@@ int select_task_rq(struct task_struct *
         * [ this allows ->select_task() to simply return task_cpu(p) and
         *   not worry about this generic constraint ]
         */
 -      if (unlikely(!cpumask_test_cpu(cpu, &p->cpus_allowed) ||
 -                   !cpu_online(cpu)))
 +      if (unlikely(!is_cpu_allowed(p, cpu)))
                cpu = select_fallback_rq(task_cpu(p), p);
  
        return cpu;
@@@ -5025,20 -5008,6 +5025,6 @@@ int __cond_resched_lock(spinlock_t *loc
  }
  EXPORT_SYMBOL(__cond_resched_lock);
  
- int __sched __cond_resched_softirq(void)
- {
-       BUG_ON(!in_softirq());
-       if (should_resched(SOFTIRQ_DISABLE_OFFSET)) {
-               local_bh_enable();
-               preempt_schedule_common();
-               local_bh_disable();
-               return 1;
-       }
-       return 0;
- }
- EXPORT_SYMBOL(__cond_resched_softirq);
  /**
   * yield - yield the current processor to other threads.
   *