Merge tag 'powerpc-4.13-6' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc...
authorLinus Torvalds <torvalds@linux-foundation.org>
Fri, 11 Aug 2017 15:56:01 +0000 (08:56 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Fri, 11 Aug 2017 15:56:01 +0000 (08:56 -0700)
Pull powerpc fixes from Michael Ellerman:
 "All fixes for code that went in this cycle.

   - a revert of an optimisation to the syscall exit path, which could
     lead to an oops on either older machines or machines with > 1TB of
     memory

   - disable some deep idle states if the firmware configuration for
     them fails

   - re-enable HARD/SOFT lockup detectors in defconfigs after a Kconfig
     change

   - six fairly small patches fixing bugs in our new watchdog code

  Thanks to: Gautham R Shenoy, Nicholas Piggin"

* tag 'powerpc-4.13-6' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux:
  powerpc/watchdog: add locking around init/exit functions
  powerpc/watchdog: Fix marking of stuck CPUs
  powerpc/watchdog: Fix final-check recovered case
  powerpc/watchdog: Moderate touch_nmi_watchdog overhead
  powerpc/watchdog: Improve watchdog lock primitive
  powerpc: NMI IPI improve lock primitive
  powerpc/configs: Re-enable HARD/SOFT lockup detectors
  powerpc/powernv/idle: Disable LOSE_FULL_CONTEXT states when stop-api fails
  Revert "powerpc/64: Avoid restore_math call if possible in syscall exit"

arch/powerpc/configs/powernv_defconfig
arch/powerpc/configs/ppc64_defconfig
arch/powerpc/configs/pseries_defconfig
arch/powerpc/kernel/entry_64.S
arch/powerpc/kernel/process.c
arch/powerpc/kernel/smp.c
arch/powerpc/kernel/watchdog.c
arch/powerpc/platforms/powernv/idle.c
drivers/cpuidle/cpuidle-powernv.c

index 0695ce0..34fc9bb 100644 (file)
@@ -293,7 +293,8 @@ CONFIG_MAGIC_SYSRQ=y
 CONFIG_DEBUG_KERNEL=y
 CONFIG_DEBUG_STACK_USAGE=y
 CONFIG_DEBUG_STACKOVERFLOW=y
-CONFIG_LOCKUP_DETECTOR=y
+CONFIG_SOFTLOCKUP_DETECTOR=y
+CONFIG_HARDLOCKUP_DETECTOR=y
 CONFIG_LATENCYTOP=y
 CONFIG_SCHED_TRACER=y
 CONFIG_BLK_DEV_IO_TRACE=y
index 5175028..c5246d2 100644 (file)
@@ -324,7 +324,8 @@ CONFIG_MAGIC_SYSRQ=y
 CONFIG_DEBUG_KERNEL=y
 CONFIG_DEBUG_STACK_USAGE=y
 CONFIG_DEBUG_STACKOVERFLOW=y
-CONFIG_LOCKUP_DETECTOR=y
+CONFIG_SOFTLOCKUP_DETECTOR=y
+CONFIG_HARDLOCKUP_DETECTOR=y
 CONFIG_DEBUG_MUTEXES=y
 CONFIG_LATENCYTOP=y
 CONFIG_SCHED_TRACER=y
index 1a61aa2..fd5d98a 100644 (file)
@@ -291,7 +291,8 @@ CONFIG_MAGIC_SYSRQ=y
 CONFIG_DEBUG_KERNEL=y
 CONFIG_DEBUG_STACK_USAGE=y
 CONFIG_DEBUG_STACKOVERFLOW=y
-CONFIG_LOCKUP_DETECTOR=y
+CONFIG_SOFTLOCKUP_DETECTOR=y
+CONFIG_HARDLOCKUP_DETECTOR=y
 CONFIG_LATENCYTOP=y
 CONFIG_SCHED_TRACER=y
 CONFIG_BLK_DEV_IO_TRACE=y
index 49d8422..e925c1c 100644 (file)
@@ -223,17 +223,27 @@ system_call_exit:
        andi.   r0,r9,(_TIF_SYSCALL_DOTRACE|_TIF_SINGLESTEP|_TIF_USER_WORK_MASK|_TIF_PERSYSCALL_MASK)
        bne-    .Lsyscall_exit_work
 
-       /* If MSR_FP and MSR_VEC are set in user msr, then no need to restore */
-       li      r7,MSR_FP
+       andi.   r0,r8,MSR_FP
+       beq 2f
 #ifdef CONFIG_ALTIVEC
-       oris    r7,r7,MSR_VEC@h
+       andis.  r0,r8,MSR_VEC@h
+       bne     3f
 #endif
-       and     r0,r8,r7
-       cmpd    r0,r7
-       bne     .Lsyscall_restore_math
-.Lsyscall_restore_math_cont:
+2:     addi    r3,r1,STACK_FRAME_OVERHEAD
+#ifdef CONFIG_PPC_BOOK3S
+       li      r10,MSR_RI
+       mtmsrd  r10,1           /* Restore RI */
+#endif
+       bl      restore_math
+#ifdef CONFIG_PPC_BOOK3S
+       li      r11,0
+       mtmsrd  r11,1
+#endif
+       ld      r8,_MSR(r1)
+       ld      r3,RESULT(r1)
+       li      r11,-MAX_ERRNO
 
-       cmpld   r3,r11
+3:     cmpld   r3,r11
        ld      r5,_CCR(r1)
        bge-    .Lsyscall_error
 .Lsyscall_error_cont:
@@ -267,40 +277,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
        std     r5,_CCR(r1)
        b       .Lsyscall_error_cont
 
-.Lsyscall_restore_math:
-       /*
-        * Some initial tests from restore_math to avoid the heavyweight
-        * C code entry and MSR manipulations.
-        */
-       LOAD_REG_IMMEDIATE(r0, MSR_TS_MASK)
-       and.    r0,r0,r8
-       bne     1f
-
-       ld      r7,PACACURRENT(r13)
-       lbz     r0,THREAD+THREAD_LOAD_FP(r7)
-#ifdef CONFIG_ALTIVEC
-       lbz     r6,THREAD+THREAD_LOAD_VEC(r7)
-       add     r0,r0,r6
-#endif
-       cmpdi   r0,0
-       beq     .Lsyscall_restore_math_cont
-
-1:     addi    r3,r1,STACK_FRAME_OVERHEAD
-#ifdef CONFIG_PPC_BOOK3S
-       li      r10,MSR_RI
-       mtmsrd  r10,1           /* Restore RI */
-#endif
-       bl      restore_math
-#ifdef CONFIG_PPC_BOOK3S
-       li      r11,0
-       mtmsrd  r11,1
-#endif
-       /* Restore volatiles, reload MSR from updated one */
-       ld      r8,_MSR(r1)
-       ld      r3,RESULT(r1)
-       li      r11,-MAX_ERRNO
-       b       .Lsyscall_restore_math_cont
-
 /* Traced system call support */
 .Lsyscall_dotrace:
        bl      save_nvgprs
index 9f3e2c9..ec48096 100644 (file)
@@ -511,10 +511,6 @@ void restore_math(struct pt_regs *regs)
 {
        unsigned long msr;
 
-       /*
-        * Syscall exit makes a similar initial check before branching
-        * to restore_math. Keep them in synch.
-        */
        if (!msr_tm_active(regs->msr) &&
                !current->thread.load_fp && !loadvec(current->thread))
                return;
index cf0e124..8d33205 100644 (file)
@@ -351,7 +351,7 @@ static void nmi_ipi_lock_start(unsigned long *flags)
        hard_irq_disable();
        while (atomic_cmpxchg(&__nmi_ipi_lock, 0, 1) == 1) {
                raw_local_irq_restore(*flags);
-               cpu_relax();
+               spin_until_cond(atomic_read(&__nmi_ipi_lock) == 0);
                raw_local_irq_save(*flags);
                hard_irq_disable();
        }
@@ -360,7 +360,7 @@ static void nmi_ipi_lock_start(unsigned long *flags)
 static void nmi_ipi_lock(void)
 {
        while (atomic_cmpxchg(&__nmi_ipi_lock, 0, 1) == 1)
-               cpu_relax();
+               spin_until_cond(atomic_read(&__nmi_ipi_lock) == 0);
 }
 
 static void nmi_ipi_unlock(void)
@@ -475,7 +475,7 @@ int smp_send_nmi_ipi(int cpu, void (*fn)(struct pt_regs *), u64 delay_us)
        nmi_ipi_lock_start(&flags);
        while (nmi_ipi_busy_count) {
                nmi_ipi_unlock_end(&flags);
-               cpu_relax();
+               spin_until_cond(nmi_ipi_busy_count == 0);
                nmi_ipi_lock_start(&flags);
        }
 
index b67f8b0..34721a2 100644 (file)
@@ -71,15 +71,20 @@ static inline void wd_smp_lock(unsigned long *flags)
         * This may be called from low level interrupt handlers at some
         * point in future.
         */
-       local_irq_save(*flags);
-       while (unlikely(test_and_set_bit_lock(0, &__wd_smp_lock)))
-               cpu_relax();
+       raw_local_irq_save(*flags);
+       hard_irq_disable(); /* Make it soft-NMI safe */
+       while (unlikely(test_and_set_bit_lock(0, &__wd_smp_lock))) {
+               raw_local_irq_restore(*flags);
+               spin_until_cond(!test_bit(0, &__wd_smp_lock));
+               raw_local_irq_save(*flags);
+               hard_irq_disable();
+       }
 }
 
 static inline void wd_smp_unlock(unsigned long *flags)
 {
        clear_bit_unlock(0, &__wd_smp_lock);
-       local_irq_restore(*flags);
+       raw_local_irq_restore(*flags);
 }
 
 static void wd_lockup_ipi(struct pt_regs *regs)
@@ -96,10 +101,10 @@ static void wd_lockup_ipi(struct pt_regs *regs)
                nmi_panic(regs, "Hard LOCKUP");
 }
 
-static void set_cpu_stuck(int cpu, u64 tb)
+static void set_cpumask_stuck(const struct cpumask *cpumask, u64 tb)
 {
-       cpumask_set_cpu(cpu, &wd_smp_cpus_stuck);
-       cpumask_clear_cpu(cpu, &wd_smp_cpus_pending);
+       cpumask_or(&wd_smp_cpus_stuck, &wd_smp_cpus_stuck, cpumask);
+       cpumask_andnot(&wd_smp_cpus_pending, &wd_smp_cpus_pending, cpumask);
        if (cpumask_empty(&wd_smp_cpus_pending)) {
                wd_smp_last_reset_tb = tb;
                cpumask_andnot(&wd_smp_cpus_pending,
@@ -107,6 +112,10 @@ static void set_cpu_stuck(int cpu, u64 tb)
                                &wd_smp_cpus_stuck);
        }
 }
+static void set_cpu_stuck(int cpu, u64 tb)
+{
+       set_cpumask_stuck(cpumask_of(cpu), tb);
+}
 
 static void watchdog_smp_panic(int cpu, u64 tb)
 {
@@ -135,11 +144,9 @@ static void watchdog_smp_panic(int cpu, u64 tb)
        }
        smp_flush_nmi_ipi(1000000);
 
-       /* Take the stuck CPU out of the watch group */
-       for_each_cpu(c, &wd_smp_cpus_pending)
-               set_cpu_stuck(c, tb);
+       /* Take the stuck CPUs out of the watch group */
+       set_cpumask_stuck(&wd_smp_cpus_pending, tb);
 
-out:
        wd_smp_unlock(&flags);
 
        printk_safe_flush();
@@ -152,6 +159,11 @@ out:
 
        if (hardlockup_panic)
                nmi_panic(NULL, "Hard LOCKUP");
+
+       return;
+
+out:
+       wd_smp_unlock(&flags);
 }
 
 static void wd_smp_clear_cpu_pending(int cpu, u64 tb)
@@ -258,9 +270,11 @@ static void wd_timer_fn(unsigned long data)
 
 void arch_touch_nmi_watchdog(void)
 {
+       unsigned long ticks = tb_ticks_per_usec * wd_timer_period_ms * 1000;
        int cpu = smp_processor_id();
 
-       watchdog_timer_interrupt(cpu);
+       if (get_tb() - per_cpu(wd_timer_tb, cpu) >= ticks)
+               watchdog_timer_interrupt(cpu);
 }
 EXPORT_SYMBOL(arch_touch_nmi_watchdog);
 
@@ -283,6 +297,8 @@ static void stop_watchdog_timer_on(unsigned int cpu)
 
 static int start_wd_on_cpu(unsigned int cpu)
 {
+       unsigned long flags;
+
        if (cpumask_test_cpu(cpu, &wd_cpus_enabled)) {
                WARN_ON(1);
                return 0;
@@ -297,12 +313,14 @@ static int start_wd_on_cpu(unsigned int cpu)
        if (!cpumask_test_cpu(cpu, &watchdog_cpumask))
                return 0;
 
+       wd_smp_lock(&flags);
        cpumask_set_cpu(cpu, &wd_cpus_enabled);
        if (cpumask_weight(&wd_cpus_enabled) == 1) {
                cpumask_set_cpu(cpu, &wd_smp_cpus_pending);
                wd_smp_last_reset_tb = get_tb();
        }
-       smp_wmb();
+       wd_smp_unlock(&flags);
+
        start_watchdog_timer_on(cpu);
 
        return 0;
@@ -310,12 +328,17 @@ static int start_wd_on_cpu(unsigned int cpu)
 
 static int stop_wd_on_cpu(unsigned int cpu)
 {
+       unsigned long flags;
+
        if (!cpumask_test_cpu(cpu, &wd_cpus_enabled))
                return 0; /* Can happen in CPU unplug case */
 
        stop_watchdog_timer_on(cpu);
 
+       wd_smp_lock(&flags);
        cpumask_clear_cpu(cpu, &wd_cpus_enabled);
+       wd_smp_unlock(&flags);
+
        wd_smp_clear_cpu_pending(cpu, get_tb());
 
        return 0;
index 2abee07..a553aee 100644 (file)
@@ -56,6 +56,7 @@ u64 pnv_first_deep_stop_state = MAX_STOP_STATE;
  */
 static u64 pnv_deepest_stop_psscr_val;
 static u64 pnv_deepest_stop_psscr_mask;
+static u64 pnv_deepest_stop_flag;
 static bool deepest_stop_found;
 
 static int pnv_save_sprs_for_deep_states(void)
@@ -185,8 +186,40 @@ static void pnv_alloc_idle_core_states(void)
 
        update_subcore_sibling_mask();
 
-       if (supported_cpuidle_states & OPAL_PM_LOSE_FULL_CONTEXT)
-               pnv_save_sprs_for_deep_states();
+       if (supported_cpuidle_states & OPAL_PM_LOSE_FULL_CONTEXT) {
+               int rc = pnv_save_sprs_for_deep_states();
+
+               if (likely(!rc))
+                       return;
+
+               /*
+                * The stop-api is unable to restore hypervisor
+                * resources on wakeup from platform idle states which
+                * lose full context. So disable such states.
+                */
+               supported_cpuidle_states &= ~OPAL_PM_LOSE_FULL_CONTEXT;
+               pr_warn("cpuidle-powernv: Disabling idle states that lose full context\n");
+               pr_warn("cpuidle-powernv: Idle power-savings, CPU-Hotplug affected\n");
+
+               if (cpu_has_feature(CPU_FTR_ARCH_300) &&
+                   (pnv_deepest_stop_flag & OPAL_PM_LOSE_FULL_CONTEXT)) {
+                       /*
+                        * Use the default stop state for CPU-Hotplug
+                        * if available.
+                        */
+                       if (default_stop_found) {
+                               pnv_deepest_stop_psscr_val =
+                                       pnv_default_stop_val;
+                               pnv_deepest_stop_psscr_mask =
+                                       pnv_default_stop_mask;
+                               pr_warn("cpuidle-powernv: Offlined CPUs will stop with psscr = 0x%016llx\n",
+                                       pnv_deepest_stop_psscr_val);
+                       } else { /* Fallback to snooze loop for CPU-Hotplug */
+                               deepest_stop_found = false;
+                               pr_warn("cpuidle-powernv: Offlined CPUs will busy wait\n");
+                       }
+               }
+       }
 }
 
 u32 pnv_get_supported_cpuidle_states(void)
@@ -375,7 +408,8 @@ unsigned long pnv_cpu_offline(unsigned int cpu)
                                                pnv_deepest_stop_psscr_val;
                srr1 = power9_idle_stop(psscr);
 
-       } else if (idle_states & OPAL_PM_WINKLE_ENABLED) {
+       } else if ((idle_states & OPAL_PM_WINKLE_ENABLED) &&
+                  (idle_states & OPAL_PM_LOSE_FULL_CONTEXT)) {
                srr1 = power7_idle_insn(PNV_THREAD_WINKLE);
        } else if ((idle_states & OPAL_PM_SLEEP_ENABLED) ||
                   (idle_states & OPAL_PM_SLEEP_ENABLED_ER1)) {
@@ -553,6 +587,7 @@ static int __init pnv_power9_idle_init(struct device_node *np, u32 *flags,
                        max_residency_ns = residency_ns[i];
                        pnv_deepest_stop_psscr_val = psscr_val[i];
                        pnv_deepest_stop_psscr_mask = psscr_mask[i];
+                       pnv_deepest_stop_flag = flags[i];
                        deepest_stop_found = true;
                }
 
index 37b0698..42896a6 100644 (file)
@@ -235,6 +235,7 @@ static inline int validate_dt_prop_sizes(const char *prop1, int prop1_len,
        return -1;
 }
 
+extern u32 pnv_get_supported_cpuidle_states(void);
 static int powernv_add_idle_states(void)
 {
        struct device_node *power_mgt;
@@ -248,6 +249,8 @@ static int powernv_add_idle_states(void)
        const char *names[CPUIDLE_STATE_MAX];
        u32 has_stop_states = 0;
        int i, rc;
+       u32 supported_flags = pnv_get_supported_cpuidle_states();
+
 
        /* Currently we have snooze statically defined */
 
@@ -362,6 +365,13 @@ static int powernv_add_idle_states(void)
        for (i = 0; i < dt_idle_states; i++) {
                unsigned int exit_latency, target_residency;
                bool stops_timebase = false;
+
+               /*
+                * Skip the platform idle state whose flag isn't in
+                * the supported_cpuidle_states flag mask.
+                */
+               if ((flags[i] & supported_flags) != flags[i])
+                       continue;
                /*
                 * If an idle state has exit latency beyond
                 * POWERNV_THRESHOLD_LATENCY_NS then don't use it