Merge branch 'akpm' (patches from Andrew)
authorLinus Torvalds <torvalds@linux-foundation.org>
Thu, 13 Jul 2017 19:38:49 +0000 (12:38 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Thu, 13 Jul 2017 19:38:49 +0000 (12:38 -0700)
Merge yet more updates from Andrew Morton:

- various misc things

- kexec updates

- sysctl core updates

- scripts/gdb udpates

- checkpoint-restart updates

- ipc updates

- kernel/watchdog updates

- Kees's "rough equivalent to the glibc _FORTIFY_SOURCE=1 feature"

- "stackprotector: ascii armor the stack canary"

- more MM bits

- checkpatch updates

* emailed patches from Andrew Morton <akpm@linux-foundation.org>: (96 commits)
  writeback: rework wb_[dec|inc]_stat family of functions
  ARM: samsung: usb-ohci: move inline before return type
  video: fbdev: omap: move inline before return type
  video: fbdev: intelfb: move inline before return type
  USB: serial: safe_serial: move __inline__ before return type
  drivers: tty: serial: move inline before return type
  drivers: s390: move static and inline before return type
  x86/efi: move asmlinkage before return type
  sh: move inline before return type
  MIPS: SMP: move asmlinkage before return type
  m68k: coldfire: move inline before return type
  ia64: sn: pci: move inline before type
  ia64: move inline before return type
  FRV: tlbflush: move asmlinkage before return type
  CRIS: gpio: move inline before return type
  ARM: HP Jornada 7XX: move inline before return type
  ARM: KVM: move asmlinkage before type
  checkpatch: improve the STORAGE_CLASS test
  mm, migration: do not trigger OOM killer when migrating memory
  drm/i915: use __GFP_RETRY_MAYFAIL
  ...

155 files changed:
Documentation/DMA-ISA-LPC.txt
Documentation/fault-injection/fault-injection.txt
Documentation/filesystems/proc.txt
Documentation/kdump/kdump.txt
arch/Kconfig
arch/arm/include/asm/kvm_hyp.h
arch/arm/mach-sa1100/jornada720_ssp.c
arch/arm64/Kconfig
arch/arm64/include/asm/stackprotector.h
arch/arm64/include/asm/string.h
arch/arm64/mm/mmap.c
arch/blackfin/include/asm/nmi.h
arch/blackfin/kernel/nmi.c
arch/cris/arch-v10/drivers/gpio.c
arch/frv/include/asm/tlbflush.h
arch/ia64/kernel/machine_kexec.c
arch/ia64/kernel/mca.c
arch/ia64/sn/pci/pcibr/pcibr_ate.c
arch/ia64/sn/pci/tioce_provider.c
arch/m68k/coldfire/intc-simr.c
arch/mips/include/asm/pgalloc.h
arch/mips/include/asm/smp.h
arch/mn10300/include/asm/nmi.h
arch/mn10300/kernel/mn10300-watchdog-low.S
arch/mn10300/kernel/mn10300-watchdog.c
arch/powerpc/Kconfig
arch/powerpc/include/asm/book3s/64/pgalloc.h
arch/powerpc/include/asm/nmi.h
arch/powerpc/include/asm/smp.h
arch/powerpc/kernel/Makefile
arch/powerpc/kernel/exceptions-64s.S
arch/powerpc/kernel/fadump.c
arch/powerpc/kernel/kvm.c
arch/powerpc/kernel/prom_init.c
arch/powerpc/kernel/setup_64.c
arch/powerpc/kernel/smp.c
arch/powerpc/kernel/watchdog.c [new file with mode: 0644]
arch/powerpc/kvm/book3s_64_mmu_hv.c
arch/powerpc/lib/feature-fixups.c
arch/powerpc/mm/mmap.c
arch/s390/kernel/machine_kexec.c
arch/s390/kernel/setup.c
arch/sh/include/asm/bug.h
arch/sh/include/asm/stackprotector.h
arch/sh/mm/cache-sh5.c
arch/sparc/include/asm/nmi.h
arch/sparc/kernel/mdesc.c
arch/sparc/kernel/nmi.c
arch/x86/Kconfig
arch/x86/boot/compressed/misc.c
arch/x86/include/asm/efi.h
arch/x86/include/asm/stackprotector.h
arch/x86/include/asm/string_32.h
arch/x86/include/asm/string_64.h
arch/x86/kernel/apic/hw_nmi.c
arch/x86/kernel/crash.c
arch/x86/lib/memcpy_32.c
arch/x86/mm/mmap.c
arch/x86/xen/mmu_pv.c
drivers/char/random.c
drivers/firmware/efi/libstub/Makefile
drivers/gpu/drm/i915/i915_gem.c
drivers/infiniband/sw/rxe/rxe_resp.c
drivers/mmc/host/wbsd.c
drivers/s390/char/vmcp.c
drivers/s390/net/ctcm_main.c
drivers/s390/net/qeth_l3_main.c
drivers/target/target_core_transport.c
drivers/tty/serial/ioc3_serial.c
drivers/tty/serial/ioc4_serial.c
drivers/usb/serial/safe_serial.c
drivers/vhost/net.c
drivers/vhost/scsi.c
drivers/vhost/vsock.c
drivers/video/fbdev/intelfb/intelfbdrv.c
drivers/video/fbdev/omap/lcdc.c
fs/Kconfig
fs/bfs/inode.c
fs/eventpoll.c
fs/fs-writeback.c
fs/proc/base.c
fs/proc/proc_sysctl.c
fs/xfs/kmem.h
include/linux/backing-dev.h
include/linux/crash_core.h
include/linux/dcache.h
include/linux/eventpoll.h
include/linux/gfp.h
include/linux/ipc.h
include/linux/kernel.h
include/linux/kexec.h
include/linux/migrate.h
include/linux/nmi.h
include/linux/platform_data/usb-ohci-s3c2410.h
include/linux/random.h
include/linux/sched.h
include/linux/sem.h
include/linux/slab.h
include/linux/string.h
include/linux/sysctl.h
include/trace/events/mmflags.h
include/uapi/linux/kcmp.h
include/uapi/linux/sem.h
init/main.c
ipc/msg.c
ipc/sem.c
ipc/shm.c
ipc/util.c
ipc/util.h
kernel/Makefile
kernel/crash_core.c
kernel/fork.c
kernel/kcmp.c
kernel/kexec.c
kernel/kexec_core.c
kernel/kexec_file.c
kernel/kexec_internal.h
kernel/ksysfs.c
kernel/sysctl.c
kernel/sysctl_binary.c
kernel/watchdog.c
kernel/watchdog_hld.c
lib/Kconfig.debug
lib/Makefile
lib/fault-inject.c
lib/percpu_counter.c
lib/string.c
lib/test_sysctl.c [new file with mode: 0644]
mm/hugetlb.c
mm/internal.h
mm/memory.c
mm/mempolicy.c
mm/page-writeback.c
mm/page_alloc.c
mm/sparse-vmemmap.c
mm/util.c
mm/vmalloc.c
mm/vmscan.c
net/core/dev.c
net/core/skbuff.c
net/netfilter/x_tables.c
net/sched/sch_fq.c
samples/kfifo/dma-example.c
scripts/checkpatch.pl
scripts/gdb/linux/constants.py.in
scripts/gdb/linux/dmesg.py
scripts/gdb/linux/proc.py
security/Kconfig
tools/perf/builtin-kmem.c
tools/testing/selftests/sysctl/Makefile
tools/testing/selftests/sysctl/common_tests [deleted file]
tools/testing/selftests/sysctl/config [new file with mode: 0644]
tools/testing/selftests/sysctl/run_numerictests [deleted file]
tools/testing/selftests/sysctl/run_stringtests [deleted file]
tools/testing/selftests/sysctl/sysctl.sh [new file with mode: 0644]

index c413313987521fdb9d95522cd9573f19af6cd97b..7a065ac4a9d1cae2fa3acc2620ca29aaeb8c3ed5 100644 (file)
@@ -42,7 +42,7 @@ requirements you pass the flag GFP_DMA to kmalloc.
 
 Unfortunately the memory available for ISA DMA is scarce so unless you
 allocate the memory during boot-up it's a good idea to also pass
-__GFP_REPEAT and __GFP_NOWARN to make the allocator try a bit harder.
+__GFP_RETRY_MAYFAIL and __GFP_NOWARN to make the allocator try a bit harder.
 
 (This scarcity also means that you should allocate the buffer as
 early as possible and not release it until the driver is unloaded.)
index 415484f3d59a248f587a9d92a315fc373c8b15f5..192d8cbcc5f998f47c76bfca653b61201f7fe160 100644 (file)
@@ -134,6 +134,22 @@ use the boot option:
        fail_futex=
        mmc_core.fail_request=<interval>,<probability>,<space>,<times>
 
+o proc entries
+
+- /proc/self/task/<current-tid>/fail-nth:
+
+       Write to this file of integer N makes N-th call in the current task fail
+       (N is 0-based). Read from this file returns a single char 'Y' or 'N'
+       that says if the fault setup with a previous write to this file was
+       injected or not, and disables the fault if it wasn't yet injected.
+       Note that this file enables all types of faults (slab, futex, etc).
+       This setting takes precedence over all other generic debugfs settings
+       like probability, interval, times, etc. But per-capability settings
+       (e.g. fail_futex/ignore-private) take precedence over it.
+
+       This feature is intended for systematic testing of faults in a single
+       system call. See an example below.
+
 How to add new fault injection capability
 -----------------------------------------
 
@@ -278,3 +294,65 @@ allocation failure.
        # env FAILCMD_TYPE=fail_page_alloc \
                ./tools/testing/fault-injection/failcmd.sh --times=100 \
                 -- make -C tools/testing/selftests/ run_tests
+
+Systematic faults using fail-nth
+---------------------------------
+
+The following code systematically faults 0-th, 1-st, 2-nd and so on
+capabilities in the socketpair() system call.
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/socket.h>
+#include <sys/syscall.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <string.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <errno.h>
+
+int main()
+{
+       int i, err, res, fail_nth, fds[2];
+       char buf[128];
+
+       system("echo N > /sys/kernel/debug/failslab/ignore-gfp-wait");
+       sprintf(buf, "/proc/self/task/%ld/fail-nth", syscall(SYS_gettid));
+       fail_nth = open(buf, O_RDWR);
+       for (i = 0;; i++) {
+               sprintf(buf, "%d", i);
+               write(fail_nth, buf, strlen(buf));
+               res = socketpair(AF_LOCAL, SOCK_STREAM, 0, fds);
+               err = errno;
+               read(fail_nth, buf, 1);
+               if (res == 0) {
+                       close(fds[0]);
+                       close(fds[1]);
+               }
+               printf("%d-th fault %c: res=%d/%d\n", i, buf[0], res, err);
+               if (buf[0] != 'Y')
+                       break;
+       }
+       return 0;
+}
+
+An example output:
+
+0-th fault Y: res=-1/23
+1-th fault Y: res=-1/23
+2-th fault Y: res=-1/23
+3-th fault Y: res=-1/12
+4-th fault Y: res=-1/12
+5-th fault Y: res=-1/23
+6-th fault Y: res=-1/23
+7-th fault Y: res=-1/23
+8-th fault Y: res=-1/12
+9-th fault Y: res=-1/12
+10-th fault Y: res=-1/12
+11-th fault Y: res=-1/12
+12-th fault Y: res=-1/12
+13-th fault Y: res=-1/12
+14-th fault Y: res=-1/12
+15-th fault Y: res=-1/12
+16-th fault N: res=0/12
index 4cddbce85ac9ad175743f5f4384a32937bf87888..adba21b5ada7b53fb9d0811039afba37141ed099 100644 (file)
@@ -1786,12 +1786,16 @@ pair provide additional information particular to the objects they represent.
        pos:    0
        flags:  02
        mnt_id: 9
-       tfd:        5 events:       1d data: ffffffffffffffff
+       tfd:        5 events:       1d data: ffffffffffffffff pos:0 ino:61af sdev:7
 
        where 'tfd' is a target file descriptor number in decimal form,
        'events' is events mask being watched and the 'data' is data
        associated with a target [see epoll(7) for more details].
 
+       The 'pos' is current offset of the target file in decimal form
+       [see lseek(2)], 'ino' and 'sdev' are inode and device numbers
+       where target file resides, all in hex format.
+
        Fsnotify files
        ~~~~~~~~~~~~~~
        For inotify files the format is the following
index 615434d81108ed28fc4f077a0e1b35cd609b2610..51814450a7f8031d6bb13dcc32f0d137fe1f1897 100644 (file)
@@ -112,8 +112,8 @@ There are two possible methods of using Kdump.
 2) Or use the system kernel binary itself as dump-capture kernel and there is
    no need to build a separate dump-capture kernel. This is possible
    only with the architectures which support a relocatable kernel. As
-   of today, i386, x86_64, ppc64, ia64 and arm architectures support relocatable
-   kernel.
+   of today, i386, x86_64, ppc64, ia64, arm and arm64 architectures support
+   relocatable kernel.
 
 Building a relocatable kernel is advantageous from the point of view that
 one does not have to build a second kernel for capturing the dump. But
@@ -339,7 +339,7 @@ For arm:
 For arm64:
        - Use vmlinux or Image
 
-If you are using a uncompressed vmlinux image then use following command
+If you are using an uncompressed vmlinux image then use following command
 to load dump-capture kernel.
 
    kexec -p <dump-capture-kernel-vmlinux-image> \
@@ -361,6 +361,12 @@ to load dump-capture kernel.
    --dtb=<dtb-for-dump-capture-kernel> \
    --append="root=<root-dev> <arch-specific-options>"
 
+If you are using an uncompressed Image, then use following command
+to load dump-capture kernel.
+
+   kexec -p <dump-capture-kernel-Image> \
+   --initrd=<initrd-for-dump-capture-kernel> \
+   --append="root=<root-dev> <arch-specific-options>"
 
 Please note, that --args-linux does not need to be specified for ia64.
 It is planned to make this a no-op on that architecture, but for now
index cae0958a2298d4f5d6bc4f498ece8350a986fc03..21d0089117fe957be2e32f97d5ab92b3086abd1c 100644 (file)
@@ -198,9 +198,6 @@ config HAVE_KPROBES_ON_FTRACE
 config HAVE_NMI
        bool
 
-config HAVE_NMI_WATCHDOG
-       depends on HAVE_NMI
-       bool
 #
 # An arch should select this if it provides all these things:
 #
@@ -226,6 +223,12 @@ config GENERIC_SMP_IDLE_THREAD
 config GENERIC_IDLE_POLL_SETUP
        bool
 
+config ARCH_HAS_FORTIFY_SOURCE
+       bool
+       help
+         An architecture should select this when it can successfully
+         build and run with CONFIG_FORTIFY_SOURCE.
+
 # Select if arch has all set_memory_ro/rw/x/nx() functions in asm/cacheflush.h
 config ARCH_HAS_SET_MEMORY
        bool
@@ -288,6 +291,28 @@ config HAVE_PERF_EVENTS_NMI
          subsystem.  Also has support for calculating CPU cycle events
          to determine how many clock cycles in a given period.
 
+config HAVE_HARDLOCKUP_DETECTOR_PERF
+       bool
+       depends on HAVE_PERF_EVENTS_NMI
+       help
+         The arch chooses to use the generic perf-NMI-based hardlockup
+         detector. Must define HAVE_PERF_EVENTS_NMI.
+
+config HAVE_NMI_WATCHDOG
+       depends on HAVE_NMI
+       bool
+       help
+         The arch provides a low level NMI watchdog. It provides
+         asm/nmi.h, and defines its own arch_touch_nmi_watchdog().
+
+config HAVE_HARDLOCKUP_DETECTOR_ARCH
+       bool
+       select HAVE_NMI_WATCHDOG
+       help
+         The arch chooses to provide its own hardlockup detector, which is
+         a superset of the HAVE_NMI_WATCHDOG. It also conforms to config
+         interfaces and parameters provided by hardlockup detector subsystem.
+
 config HAVE_PERF_REGS
        bool
        help
index 58508900c4bb264be2a874299b829b31a9cb5601..14b5903f022461a2f090ad16ff7880e13cea04d3 100644 (file)
@@ -110,8 +110,8 @@ void __sysreg_restore_state(struct kvm_cpu_context *ctxt);
 void __vgic_v3_save_state(struct kvm_vcpu *vcpu);
 void __vgic_v3_restore_state(struct kvm_vcpu *vcpu);
 
-void asmlinkage __vfp_save_state(struct vfp_hard_struct *vfp);
-void asmlinkage __vfp_restore_state(struct vfp_hard_struct *vfp);
+asmlinkage void __vfp_save_state(struct vfp_hard_struct *vfp);
+asmlinkage void __vfp_restore_state(struct vfp_hard_struct *vfp);
 static inline bool __vfp_enabled(void)
 {
        return !(read_sysreg(HCPTR) & (HCPTR_TCP(11) | HCPTR_TCP(10)));
@@ -120,8 +120,8 @@ static inline bool __vfp_enabled(void)
 void __hyp_text __banked_save_state(struct kvm_cpu_context *ctxt);
 void __hyp_text __banked_restore_state(struct kvm_cpu_context *ctxt);
 
-int asmlinkage __guest_enter(struct kvm_vcpu *vcpu,
+asmlinkage int __guest_enter(struct kvm_vcpu *vcpu,
                             struct kvm_cpu_context *host);
-int asmlinkage __hyp_do_panic(const char *, int, u32);
+asmlinkage int __hyp_do_panic(const char *, int, u32);
 
 #endif /* __ARM_KVM_HYP_H__ */
index b143c4659346d841d7249fc358b12272061e71af..7fc11a3c17b4cff538e1bcfbd242f1096d39acb6 100644 (file)
@@ -33,7 +33,7 @@ static unsigned long jornada_ssp_flags;
  * we need to reverse all data we receive from the mcu due to its physical location
  * returns : 01110111 -> 11101110
  */
-u8 inline jornada_ssp_reverse(u8 byte)
+inline u8 jornada_ssp_reverse(u8 byte)
 {
        return
                ((0x80 & byte) >> 7) |
index 8addb851ab5e3288b1a3c89dc6128cdf63779613..dfd908630631a30851a8f573e41dbe8b21af5f19 100644 (file)
@@ -12,6 +12,7 @@ config ARM64
        select ARCH_HAS_DEVMEM_IS_ALLOWED
        select ARCH_HAS_ACPI_TABLE_UPGRADE if ACPI
        select ARCH_HAS_ELF_RANDOMIZE
+       select ARCH_HAS_FORTIFY_SOURCE
        select ARCH_HAS_GCOV_PROFILE_ALL
        select ARCH_HAS_GIGANTIC_PAGE if (MEMORY_ISOLATION && COMPACTION) || CMA
        select ARCH_HAS_KCOV
index fe5e287dc56b7263f40146208956ee203bcd080e..b86a0865ddf16a020ca2e5cf727d92878860d4d9 100644 (file)
@@ -30,6 +30,7 @@ static __always_inline void boot_init_stack_canary(void)
        /* Try to get a semi random initial value. */
        get_random_bytes(&canary, sizeof(canary));
        canary ^= LINUX_VERSION_CODE;
+       canary &= CANARY_MASK;
 
        current->stack_canary = canary;
        __stack_chk_guard = current->stack_canary;
index 2eb714c4639f5669b1012aae0c77ca74fdb72881..d0aa42907569beb95142f205c03bc1f21b9bae88 100644 (file)
@@ -63,6 +63,11 @@ extern int memcmp(const void *, const void *, size_t);
 #define memcpy(dst, src, len) __memcpy(dst, src, len)
 #define memmove(dst, src, len) __memmove(dst, src, len)
 #define memset(s, c, n) __memset(s, c, n)
+
+#ifndef __NO_FORTIFY
+#define __NO_FORTIFY /* FORTIFY_SOURCE uses __builtin_memcpy, etc. */
+#endif
+
 #endif
 
 #endif
index adc208c2ae9c0f6c99e0f28c32a668ae0db72441..decccffb03cac60abe6e5628e2d0f3a47cc7d099 100644 (file)
@@ -35,7 +35,7 @@
  * Leave enough space between the mmap area and the stack to honour ulimit in
  * the face of randomisation.
  */
-#define MIN_GAP (SZ_128M + ((STACK_RND_MASK << PAGE_SHIFT) + 1))
+#define MIN_GAP (SZ_128M)
 #define MAX_GAP        (STACK_TOP/6*5)
 
 static int mmap_is_legacy(void)
@@ -65,6 +65,11 @@ unsigned long arch_mmap_rnd(void)
 static unsigned long mmap_base(unsigned long rnd)
 {
        unsigned long gap = rlimit(RLIMIT_STACK);
+       unsigned long pad = (STACK_RND_MASK << PAGE_SHIFT) + stack_guard_gap;
+
+       /* Values close to RLIM_INFINITY can overflow. */
+       if (gap + pad > gap)
+               gap += pad;
 
        if (gap < MIN_GAP)
                gap = MIN_GAP;
index b9caac4fcfd854749a24ab7831d95edbd7d1d9b7..107d23705f465db6b363b10c5bff46203b2205f0 100644 (file)
@@ -9,4 +9,6 @@
 
 #include <linux/nmi.h>
 
+extern void arch_touch_nmi_watchdog(void);
+
 #endif
index 633c37083e877f3aa930c40acd197876d52834f7..1e714329fe8a186f59f7a6bf25de7393a14fb403 100644 (file)
@@ -190,7 +190,7 @@ static int __init init_nmi_wdt(void)
 }
 device_initcall(init_nmi_wdt);
 
-void touch_nmi_watchdog(void)
+void arch_touch_nmi_watchdog(void)
 {
        atomic_set(&nmi_touched[smp_processor_id()], 1);
 }
index 64285e0d348154c461bf79553b964417751a1126..dfd3b3ba5e4ec438fdda67c287bad41ff0d31352 100644 (file)
@@ -399,7 +399,7 @@ out:
 /* Main device API. ioctl's to read/set/clear bits, as well as to
  * set alarms to wait for using a subsequent select().
  */
-unsigned long inline setget_input(struct gpio_private *priv, unsigned long arg)
+inline unsigned long setget_input(struct gpio_private *priv, unsigned long arg)
 {
        /* Set direction 0=unchanged 1=input,
         * return mask with 1=input */
@@ -450,7 +450,7 @@ unsigned long inline setget_input(struct gpio_private *priv, unsigned long arg)
        return dir_g_in_bits;
 } /* setget_input */
 
-unsigned long inline setget_output(struct gpio_private *priv, unsigned long arg)
+inline unsigned long setget_output(struct gpio_private *priv, unsigned long arg)
 {
        if (USE_PORTS(priv)) {
                *priv->dir = *priv->dir_shadow |=
index 7ac5eafc5d9839c34c23c459075aa8c8b450629c..75879420f578e4fe4b23ab945b3ff3fc79426895 100644 (file)
 #ifdef CONFIG_MMU
 
 #ifndef __ASSEMBLY__
-extern void asmlinkage __flush_tlb_all(void);
-extern void asmlinkage __flush_tlb_mm(unsigned long contextid);
-extern void asmlinkage __flush_tlb_page(unsigned long contextid, unsigned long start);
-extern void asmlinkage __flush_tlb_range(unsigned long contextid,
+extern asmlinkage void __flush_tlb_all(void);
+extern asmlinkage void __flush_tlb_mm(unsigned long contextid);
+extern asmlinkage void __flush_tlb_page(unsigned long contextid, unsigned long start);
+extern asmlinkage void __flush_tlb_range(unsigned long contextid,
                                         unsigned long start, unsigned long end);
 #endif /* !__ASSEMBLY__ */
 
index 599507bcec91fc577147cf97d894fe7f6dd11677..c14815dca747dead3c608a4bf1a5122165aaf2b7 100644 (file)
@@ -163,8 +163,3 @@ void arch_crash_save_vmcoreinfo(void)
 #endif
 }
 
-phys_addr_t paddr_vmcoreinfo_note(void)
-{
-       return ia64_tpa((unsigned long)(char *)&vmcoreinfo_note);
-}
-
index 79c7c46d7dc175eaef91528c50cc70d16c750b6a..555b11180156086bd394389b1036bac8e5be995b 100644 (file)
@@ -334,7 +334,7 @@ static void ia64_mlogbuf_dump_from_init(void)
        ia64_mlogbuf_dump();
 }
 
-static void inline
+static inline void
 ia64_mca_spin(const char *func)
 {
        if (monarch_cpu == smp_processor_id())
index 5bc34eac9e0193b416d3d22690a75d6a63b7c719..b67bb4cb73ff7ec35479788c382c718b088409a3 100644 (file)
@@ -140,7 +140,7 @@ static inline u64 __iomem *pcibr_ate_addr(struct pcibus_info *pcibus_info,
 /*
  * Update the ate.
  */
-void inline
+inline void
 ate_write(struct pcibus_info *pcibus_info, int ate_index, int count,
          volatile u64 ate)
 {
index 46d3df4b03a160f4462e9282626661f981a76ee7..3bd9abc35485ed4cd2c4ba4ffa51b974b4044f4c 100644 (file)
@@ -52,7 +52,7 @@
  * All registers defined in struct tioce will meet that criteria.
  */
 
-static void inline
+static inline void
 tioce_mmr_war_pre(struct tioce_kernel *kern, void __iomem *mmr_addr)
 {
        u64 mmr_base;
@@ -78,7 +78,7 @@ tioce_mmr_war_pre(struct tioce_kernel *kern, void __iomem *mmr_addr)
        }
 }
 
-static void inline
+static inline void
 tioce_mmr_war_post(struct tioce_kernel *kern, void __iomem *mmr_addr)
 {
        u64 mmr_base;
index 7cf2c156f72d2f5fa045ae1fcbc033a5f34cdec0..15c4b7a6e38f4c5759331fb8067671b4cf695371 100644 (file)
@@ -35,7 +35,7 @@
 #define        EINT7   67      /* EDGE Port interrupt 7 */
 
 static unsigned int irqebitmap[] = { 0, 1, 4, 7 };
-static unsigned int inline irq2ebit(unsigned int irq)
+static inline unsigned int irq2ebit(unsigned int irq)
 {
        return irqebitmap[irq - EINT0];
 }
@@ -51,7 +51,7 @@ static unsigned int inline irq2ebit(unsigned int irq)
 #define        EINT1   65      /* EDGE Port interrupt 1 */
 #define        EINT7   71      /* EDGE Port interrupt 7 */
 
-static unsigned int inline irq2ebit(unsigned int irq)
+static inline unsigned int irq2ebit(unsigned int irq)
 {
        return irq - EINT0;
 }
index a1bdb1ea523479c62d4b3f38206910276339699d..39b9f311c4ef471a725fa0be8b0d3178ebd947ca 100644 (file)
@@ -116,7 +116,7 @@ static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long address)
 {
        pud_t *pud;
 
-       pud = (pud_t *) __get_free_pages(GFP_KERNEL|__GFP_REPEAT, PUD_ORDER);
+       pud = (pud_t *) __get_free_pages(GFP_KERNEL, PUD_ORDER);
        if (pud)
                pud_init((unsigned long)pud, (unsigned long)invalid_pmd_table);
        return pud;
index 98a117a05fbce1af774dc0f6833e78163036e8ce..bab3d41e5987eea7c56af1e08563d65461eb61fb 100644 (file)
@@ -47,7 +47,7 @@ extern int __cpu_logical_map[NR_CPUS];
 /* Mask of CPUs which are currently definitely operating coherently */
 extern cpumask_t cpu_coherent_mask;
 
-extern void asmlinkage smp_bootstrap(void);
+extern asmlinkage void smp_bootstrap(void);
 
 extern void calculate_cpu_foreign_map(void);
 
index f3671cbbc1177c804613eb68818a4b8c9e66ca5a..b05627597b1ba251831d393f1b4b672dfd8c61eb 100644 (file)
@@ -11,4 +11,6 @@
 #ifndef _ASM_NMI_H
 #define _ASM_NMI_H
 
+extern void arch_touch_nmi_watchdog(void);
+
 #endif /* _ASM_NMI_H */
index f2f5c9cfaabd803e02b95fad30267e1cde8a35c3..34f8773de7d0379408be6c49800b921a47b2a525 100644 (file)
@@ -50,9 +50,9 @@ watchdog_handler:
 #   we can't inline it)
 #
 ###############################################################################
-       .globl  touch_nmi_watchdog
-       .type   touch_nmi_watchdog,@function
-touch_nmi_watchdog:
+       .globl  arch_touch_nmi_watchdog
+       .type   arch_touch_nmi_watchdog,@function
+arch_touch_nmi_watchdog:
        clr     d0
        clr     d1
        mov     watchdog_alert_counter, a0
@@ -63,4 +63,4 @@ touch_nmi_watchdog:
        lne
        ret     [],0
 
-       .size   touch_nmi_watchdog,.-touch_nmi_watchdog
+       .size   arch_touch_nmi_watchdog,.-arch_touch_nmi_watchdog
index a2d8e6938d6716bca53162a00775291bca58d227..0d5641beadf5cecfe37addd4aa569acdf2be8fc5 100644 (file)
@@ -31,7 +31,7 @@ static unsigned int watchdog;
 static unsigned int watchdog_hz = 1;
 unsigned int watchdog_alert_counter[NR_CPUS];
 
-EXPORT_SYMBOL(touch_nmi_watchdog);
+EXPORT_SYMBOL(arch_touch_nmi_watchdog);
 
 /*
  * the best way to detect whether a CPU has a 'hard lockup' problem
index 7177a3f4f41815dffbb0976c8442a85d52a8b22d..36f858c37ca70b576e851a52ada48e2400de86a1 100644 (file)
@@ -82,7 +82,7 @@ config NR_IRQS
 
 config NMI_IPI
        bool
-       depends on SMP && (DEBUGGER || KEXEC_CORE)
+       depends on SMP && (DEBUGGER || KEXEC_CORE || HARDLOCKUP_DETECTOR)
        default y
 
 config STACKTRACE_SUPPORT
@@ -125,6 +125,7 @@ config PPC
        select ARCH_HAS_DEVMEM_IS_ALLOWED
        select ARCH_HAS_DMA_SET_COHERENT_MASK
        select ARCH_HAS_ELF_RANDOMIZE
+       select ARCH_HAS_FORTIFY_SOURCE
        select ARCH_HAS_GCOV_PROFILE_ALL
        select ARCH_HAS_SCALED_CPUTIME          if VIRT_CPU_ACCOUNTING_NATIVE
        select ARCH_HAS_SG_CHAIN
@@ -192,11 +193,13 @@ config PPC
        select HAVE_MEMBLOCK
        select HAVE_MEMBLOCK_NODE_MAP
        select HAVE_MOD_ARCH_SPECIFIC
-       select HAVE_NMI                         if PERF_EVENTS
+       select HAVE_NMI                         if PERF_EVENTS || (PPC64 && PPC_BOOK3S)
+       select HAVE_HARDLOCKUP_DETECTOR_ARCH    if (PPC64 && PPC_BOOK3S)
        select HAVE_OPROFILE
        select HAVE_OPTPROBES                   if PPC64
        select HAVE_PERF_EVENTS
        select HAVE_PERF_EVENTS_NMI             if PPC64
+       select HAVE_HARDLOCKUP_DETECTOR_PERF    if HAVE_PERF_EVENTS_NMI && !HAVE_HARDLOCKUP_DETECTOR_ARCH
        select HAVE_PERF_REGS
        select HAVE_PERF_USER_STACK_DUMP
        select HAVE_RCU_TABLE_FREE              if SMP
index 20b1485ff1e885254b032a2dcbe017fb9566baf2..e2329db9d6f405fe1d32ebfb8f95035cf1ecedcd 100644 (file)
@@ -56,7 +56,7 @@ static inline pgd_t *radix__pgd_alloc(struct mm_struct *mm)
        return (pgd_t *)__get_free_page(pgtable_gfp_flags(mm, PGALLOC_GFP));
 #else
        struct page *page;
-       page = alloc_pages(pgtable_gfp_flags(mm, PGALLOC_GFP | __GFP_REPEAT),
+       page = alloc_pages(pgtable_gfp_flags(mm, PGALLOC_GFP | __GFP_RETRY_MAYFAIL),
                                4);
        if (!page)
                return NULL;
index ff1ccb375e6063806941e6186b737a586279686c..6f8e79cd35d87f9eababcbbbba23c3d7f0f98f87 100644 (file)
@@ -1,4 +1,15 @@
 #ifndef _ASM_NMI_H
 #define _ASM_NMI_H
 
+#ifdef CONFIG_HARDLOCKUP_DETECTOR
+extern void arch_touch_nmi_watchdog(void);
+
+extern void arch_trigger_cpumask_backtrace(const cpumask_t *mask,
+                                          bool exclude_self);
+#define arch_trigger_cpumask_backtrace arch_trigger_cpumask_backtrace
+
+#else
+static inline void arch_touch_nmi_watchdog(void) {}
+#endif
+
 #endif /* _ASM_NMI_H */
index ebddb2111d870cebec83b578c1e627c5300701a8..8ea98504f90078e6ea0592531ef4b482f63ef4d3 100644 (file)
@@ -55,6 +55,8 @@ struct smp_ops_t {
        int   (*cpu_bootable)(unsigned int nr);
 };
 
+extern void smp_flush_nmi_ipi(u64 delay_us);
+extern int smp_send_nmi_ipi(int cpu, void (*fn)(struct pt_regs *), u64 delay_us);
 extern void smp_send_debugger_break(void);
 extern void start_secondary_resume(void);
 extern void smp_generic_give_timebase(void);
index 0845eebc5af382be631a09bb16b6c9f2b5570caa..4aa7c147e4472257564200a8a55a742c78989023 100644 (file)
@@ -38,6 +38,7 @@ obj-$(CONFIG_PPC64)           += setup_64.o sys_ppc32.o \
                                   signal_64.o ptrace32.o \
                                   paca.o nvram_64.o firmware.o
 obj-$(CONFIG_VDSO32)           += vdso32/
+obj-$(CONFIG_HARDLOCKUP_DETECTOR)      += watchdog.o
 obj-$(CONFIG_HAVE_HW_BREAKPOINT)       += hw_breakpoint.o
 obj-$(CONFIG_PPC_BOOK3S_64)    += cpu_setup_ppc970.o cpu_setup_pa6t.o
 obj-$(CONFIG_PPC_BOOK3S_64)    += cpu_setup_power.o
index 4c18a5fbb4bbf52dd61fc02d3953ae55a56a25f1..e6d8354d79ef25a34ece7766683813a65ffde57c 100644 (file)
@@ -1314,6 +1314,31 @@ EXC_REAL_NONE(0x1800, 0x100)
 EXC_VIRT_NONE(0x5800, 0x100)
 #endif
 
+#if defined(CONFIG_HARDLOCKUP_DETECTOR) && defined(CONFIG_HAVE_HARDLOCKUP_DETECTOR_ARCH)
+
+#define MASKED_DEC_HANDLER_LABEL 3f
+
+#define MASKED_DEC_HANDLER(_H)                         \
+3: /* soft-nmi */                                      \
+       std     r12,PACA_EXGEN+EX_R12(r13);             \
+       GET_SCRATCH0(r10);                              \
+       std     r10,PACA_EXGEN+EX_R13(r13);             \
+       EXCEPTION_PROLOG_PSERIES_1(soft_nmi_common, _H)
+
+EXC_COMMON_BEGIN(soft_nmi_common)
+       mr      r10,r1
+       ld      r1,PACAEMERGSP(r13)
+       ld      r1,PACA_NMI_EMERG_SP(r13)
+       subi    r1,r1,INT_FRAME_SIZE
+       EXCEPTION_COMMON_NORET_STACK(PACA_EXGEN, 0x900,
+                       system_reset, soft_nmi_interrupt,
+                       ADD_NVGPRS;ADD_RECONCILE)
+       b       ret_from_except
+
+#else
+#define MASKED_DEC_HANDLER_LABEL 2f /* normal return */
+#define MASKED_DEC_HANDLER(_H)
+#endif
 
 /*
  * An interrupt came in while soft-disabled. We set paca->irq_happened, then:
@@ -1336,7 +1361,7 @@ masked_##_H##interrupt:                                   \
        lis     r10,0x7fff;                             \
        ori     r10,r10,0xffff;                         \
        mtspr   SPRN_DEC,r10;                           \
-       b       2f;                                     \
+       b       MASKED_DEC_HANDLER_LABEL;               \
 1:     cmpwi   r10,PACA_IRQ_DBELL;                     \
        beq     2f;                                     \
        cmpwi   r10,PACA_IRQ_HMI;                       \
@@ -1351,7 +1376,8 @@ masked_##_H##interrupt:                                   \
        ld      r11,PACA_EXGEN+EX_R11(r13);             \
        GET_SCRATCH0(r13);                              \
        ##_H##rfid;                                     \
-       b       .
+       b       .;                                      \
+       MASKED_DEC_HANDLER(_H)
 
 /*
  * Real mode exceptions actually use this too, but alternate
index 3079518f2245c98f80efe3c9cd96952f2cc78d69..dc0c49cfd90a093aba02fc3f8931e87d687380a7 100644 (file)
@@ -999,8 +999,7 @@ static int fadump_create_elfcore_headers(char *bufp)
 
        phdr->p_paddr   = fadump_relocate(paddr_vmcoreinfo_note());
        phdr->p_offset  = phdr->p_paddr;
-       phdr->p_memsz   = vmcoreinfo_max_size;
-       phdr->p_filesz  = vmcoreinfo_max_size;
+       phdr->p_memsz   = phdr->p_filesz = VMCOREINFO_NOTE_SIZE;
 
        /* Increment number of program headers. */
        (elf->e_phnum)++;
index 9ad37f827a975f1f2a7f165447f951c9e2aa8fa7..1086ea37c83241a115378ae1de5971f6c004aaad 100644 (file)
@@ -25,6 +25,7 @@
 #include <linux/kvm_para.h>
 #include <linux/slab.h>
 #include <linux/of.h>
+#include <linux/nmi.h> /* hardlockup_detector_disable() */
 
 #include <asm/reg.h>
 #include <asm/sections.h>
@@ -718,6 +719,12 @@ static __init void kvm_free_tmp(void)
 
 static int __init kvm_guest_init(void)
 {
+       /*
+        * The hardlockup detector is likely to get false positives in
+        * KVM guests, so disable it by default.
+        */
+       hardlockup_detector_disable();
+
        if (!kvm_para_available())
                goto free_tmp;
 
index dd8a04f3053a71dd811c03cb1c668d1a61d35063..613f79f03877d725fd09305072e31938ae33fe3a 100644 (file)
@@ -15,6 +15,9 @@
 
 #undef DEBUG_PROM
 
+/* we cannot use FORTIFY as it brings in new symbols */
+#define __NO_FORTIFY
+
 #include <stdarg.h>
 #include <linux/kernel.h>
 #include <linux/string.h>
index 4640f6d64f8b406a636d60c4ea2263658dde5be6..af23d4b576ec5ba40326b46070869ac6fa861a8d 100644 (file)
@@ -751,22 +751,3 @@ unsigned long memory_block_size_bytes(void)
 struct ppc_pci_io ppc_pci_io;
 EXPORT_SYMBOL(ppc_pci_io);
 #endif
-
-#ifdef CONFIG_HARDLOCKUP_DETECTOR
-u64 hw_nmi_get_sample_period(int watchdog_thresh)
-{
-       return ppc_proc_freq * watchdog_thresh;
-}
-
-/*
- * The hardlockup detector breaks PMU event based branches and is likely
- * to get false positives in KVM guests, so disable it by default.
- */
-static int __init disable_hardlockup_detector(void)
-{
-       hardlockup_detector_disable();
-
-       return 0;
-}
-early_initcall(disable_hardlockup_detector);
-#endif
index c6b8bace1766934047d772dd00e839d121053980..997c88d54acf292b3e80beef1791ee2194ca9071 100644 (file)
@@ -435,13 +435,31 @@ static void do_smp_send_nmi_ipi(int cpu)
        }
 }
 
+void smp_flush_nmi_ipi(u64 delay_us)
+{
+       unsigned long flags;
+
+       nmi_ipi_lock_start(&flags);
+       while (nmi_ipi_busy_count) {
+               nmi_ipi_unlock_end(&flags);
+               udelay(1);
+               if (delay_us) {
+                       delay_us--;
+                       if (!delay_us)
+                               return;
+               }
+               nmi_ipi_lock_start(&flags);
+       }
+       nmi_ipi_unlock_end(&flags);
+}
+
 /*
  * - cpu is the target CPU (must not be this CPU), or NMI_IPI_ALL_OTHERS.
  * - fn is the target callback function.
  * - delay_us > 0 is the delay before giving up waiting for targets to
  *   enter the handler, == 0 specifies indefinite delay.
  */
-static int smp_send_nmi_ipi(int cpu, void (*fn)(struct pt_regs *), u64 delay_us)
+int smp_send_nmi_ipi(int cpu, void (*fn)(struct pt_regs *), u64 delay_us)
 {
        unsigned long flags;
        int me = raw_smp_processor_id();
diff --git a/arch/powerpc/kernel/watchdog.c b/arch/powerpc/kernel/watchdog.c
new file mode 100644 (file)
index 0000000..b67f8b0
--- /dev/null
@@ -0,0 +1,386 @@
+/*
+ * Watchdog support on powerpc systems.
+ *
+ * Copyright 2017, IBM Corporation.
+ *
+ * This uses code from arch/sparc/kernel/nmi.c and kernel/watchdog.c
+ */
+#include <linux/kernel.h>
+#include <linux/param.h>
+#include <linux/init.h>
+#include <linux/percpu.h>
+#include <linux/cpu.h>
+#include <linux/nmi.h>
+#include <linux/module.h>
+#include <linux/export.h>
+#include <linux/kprobes.h>
+#include <linux/hardirq.h>
+#include <linux/reboot.h>
+#include <linux/slab.h>
+#include <linux/kdebug.h>
+#include <linux/sched/debug.h>
+#include <linux/delay.h>
+#include <linux/smp.h>
+
+#include <asm/paca.h>
+
+/*
+ * The watchdog has a simple timer that runs on each CPU, once per timer
+ * period. This is the heartbeat.
+ *
+ * Then there are checks to see if the heartbeat has not triggered on a CPU
+ * for the panic timeout period. Currently the watchdog only supports an
+ * SMP check, so the heartbeat only turns on when we have 2 or more CPUs.
+ *
+ * This is not an NMI watchdog, but Linux uses that name for a generic
+ * watchdog in some cases, so NMI gets used in some places.
+ */
+
+static cpumask_t wd_cpus_enabled __read_mostly;
+
+static u64 wd_panic_timeout_tb __read_mostly; /* timebase ticks until panic */
+static u64 wd_smp_panic_timeout_tb __read_mostly; /* panic other CPUs */
+
+static u64 wd_timer_period_ms __read_mostly;  /* interval between heartbeat */
+
+static DEFINE_PER_CPU(struct timer_list, wd_timer);
+static DEFINE_PER_CPU(u64, wd_timer_tb);
+
+/*
+ * These are for the SMP checker. CPUs clear their pending bit in their
+ * heartbeat. If the bitmask becomes empty, the time is noted and the
+ * bitmask is refilled.
+ *
+ * All CPUs clear their bit in the pending mask every timer period.
+ * Once all have cleared, the time is noted and the bits are reset.
+ * If the time since all clear was greater than the panic timeout,
+ * we can panic with the list of stuck CPUs.
+ *
+ * This will work best with NMI IPIs for crash code so the stuck CPUs
+ * can be pulled out to get their backtraces.
+ */
+static unsigned long __wd_smp_lock;
+static cpumask_t wd_smp_cpus_pending;
+static cpumask_t wd_smp_cpus_stuck;
+static u64 wd_smp_last_reset_tb;
+
+static inline void wd_smp_lock(unsigned long *flags)
+{
+       /*
+        * Avoid locking layers if possible.
+        * This may be called from low level interrupt handlers at some
+        * point in future.
+        */
+       local_irq_save(*flags);
+       while (unlikely(test_and_set_bit_lock(0, &__wd_smp_lock)))
+               cpu_relax();
+}
+
+static inline void wd_smp_unlock(unsigned long *flags)
+{
+       clear_bit_unlock(0, &__wd_smp_lock);
+       local_irq_restore(*flags);
+}
+
+static void wd_lockup_ipi(struct pt_regs *regs)
+{
+       pr_emerg("Watchdog CPU:%d Hard LOCKUP\n", raw_smp_processor_id());
+       print_modules();
+       print_irqtrace_events(current);
+       if (regs)
+               show_regs(regs);
+       else
+               dump_stack();
+
+       if (hardlockup_panic)
+               nmi_panic(regs, "Hard LOCKUP");
+}
+
+static void set_cpu_stuck(int cpu, u64 tb)
+{
+       cpumask_set_cpu(cpu, &wd_smp_cpus_stuck);
+       cpumask_clear_cpu(cpu, &wd_smp_cpus_pending);
+       if (cpumask_empty(&wd_smp_cpus_pending)) {
+               wd_smp_last_reset_tb = tb;
+               cpumask_andnot(&wd_smp_cpus_pending,
+                               &wd_cpus_enabled,
+                               &wd_smp_cpus_stuck);
+       }
+}
+
+static void watchdog_smp_panic(int cpu, u64 tb)
+{
+       unsigned long flags;
+       int c;
+
+       wd_smp_lock(&flags);
+       /* Double check some things under lock */
+       if ((s64)(tb - wd_smp_last_reset_tb) < (s64)wd_smp_panic_timeout_tb)
+               goto out;
+       if (cpumask_test_cpu(cpu, &wd_smp_cpus_pending))
+               goto out;
+       if (cpumask_weight(&wd_smp_cpus_pending) == 0)
+               goto out;
+
+       pr_emerg("Watchdog CPU:%d detected Hard LOCKUP other CPUS:%*pbl\n",
+                       cpu, cpumask_pr_args(&wd_smp_cpus_pending));
+
+       /*
+        * Try to trigger the stuck CPUs.
+        */
+       for_each_cpu(c, &wd_smp_cpus_pending) {
+               if (c == cpu)
+                       continue;
+               smp_send_nmi_ipi(c, wd_lockup_ipi, 1000000);
+       }
+       smp_flush_nmi_ipi(1000000);
+
+       /* Take the stuck CPU out of the watch group */
+       for_each_cpu(c, &wd_smp_cpus_pending)
+               set_cpu_stuck(c, tb);
+
+out:
+       wd_smp_unlock(&flags);
+
+       printk_safe_flush();
+       /*
+        * printk_safe_flush() seems to require another print
+        * before anything actually goes out to console.
+        */
+       if (sysctl_hardlockup_all_cpu_backtrace)
+               trigger_allbutself_cpu_backtrace();
+
+       if (hardlockup_panic)
+               nmi_panic(NULL, "Hard LOCKUP");
+}
+
+static void wd_smp_clear_cpu_pending(int cpu, u64 tb)
+{
+       if (!cpumask_test_cpu(cpu, &wd_smp_cpus_pending)) {
+               if (unlikely(cpumask_test_cpu(cpu, &wd_smp_cpus_stuck))) {
+                       unsigned long flags;
+
+                       pr_emerg("Watchdog CPU:%d became unstuck\n", cpu);
+                       wd_smp_lock(&flags);
+                       cpumask_clear_cpu(cpu, &wd_smp_cpus_stuck);
+                       wd_smp_unlock(&flags);
+               }
+               return;
+       }
+       cpumask_clear_cpu(cpu, &wd_smp_cpus_pending);
+       if (cpumask_empty(&wd_smp_cpus_pending)) {
+               unsigned long flags;
+
+               wd_smp_lock(&flags);
+               if (cpumask_empty(&wd_smp_cpus_pending)) {
+                       wd_smp_last_reset_tb = tb;
+                       cpumask_andnot(&wd_smp_cpus_pending,
+                                       &wd_cpus_enabled,
+                                       &wd_smp_cpus_stuck);
+               }
+               wd_smp_unlock(&flags);
+       }
+}
+
+static void watchdog_timer_interrupt(int cpu)
+{
+       u64 tb = get_tb();
+
+       per_cpu(wd_timer_tb, cpu) = tb;
+
+       wd_smp_clear_cpu_pending(cpu, tb);
+
+       if ((s64)(tb - wd_smp_last_reset_tb) >= (s64)wd_smp_panic_timeout_tb)
+               watchdog_smp_panic(cpu, tb);
+}
+
+void soft_nmi_interrupt(struct pt_regs *regs)
+{
+       unsigned long flags;
+       int cpu = raw_smp_processor_id();
+       u64 tb;
+
+       if (!cpumask_test_cpu(cpu, &wd_cpus_enabled))
+               return;
+
+       nmi_enter();
+       tb = get_tb();
+       if (tb - per_cpu(wd_timer_tb, cpu) >= wd_panic_timeout_tb) {
+               per_cpu(wd_timer_tb, cpu) = tb;
+
+               wd_smp_lock(&flags);
+               if (cpumask_test_cpu(cpu, &wd_smp_cpus_stuck)) {
+                       wd_smp_unlock(&flags);
+                       goto out;
+               }
+               set_cpu_stuck(cpu, tb);
+
+               pr_emerg("Watchdog CPU:%d Hard LOCKUP\n", cpu);
+               print_modules();
+               print_irqtrace_events(current);
+               if (regs)
+                       show_regs(regs);
+               else
+                       dump_stack();
+
+               wd_smp_unlock(&flags);
+
+               if (sysctl_hardlockup_all_cpu_backtrace)
+                       trigger_allbutself_cpu_backtrace();
+
+               if (hardlockup_panic)
+                       nmi_panic(regs, "Hard LOCKUP");
+       }
+       if (wd_panic_timeout_tb < 0x7fffffff)
+               mtspr(SPRN_DEC, wd_panic_timeout_tb);
+
+out:
+       nmi_exit();
+}
+
+static void wd_timer_reset(unsigned int cpu, struct timer_list *t)
+{
+       t->expires = jiffies + msecs_to_jiffies(wd_timer_period_ms);
+       if (wd_timer_period_ms > 1000)
+               t->expires = __round_jiffies_up(t->expires, cpu);
+       add_timer_on(t, cpu);
+}
+
+static void wd_timer_fn(unsigned long data)
+{
+       struct timer_list *t = this_cpu_ptr(&wd_timer);
+       int cpu = smp_processor_id();
+
+       watchdog_timer_interrupt(cpu);
+
+       wd_timer_reset(cpu, t);
+}
+
+void arch_touch_nmi_watchdog(void)
+{
+       int cpu = smp_processor_id();
+
+       watchdog_timer_interrupt(cpu);
+}
+EXPORT_SYMBOL(arch_touch_nmi_watchdog);
+
+static void start_watchdog_timer_on(unsigned int cpu)
+{
+       struct timer_list *t = per_cpu_ptr(&wd_timer, cpu);
+
+       per_cpu(wd_timer_tb, cpu) = get_tb();
+
+       setup_pinned_timer(t, wd_timer_fn, 0);
+       wd_timer_reset(cpu, t);
+}
+
+static void stop_watchdog_timer_on(unsigned int cpu)
+{
+       struct timer_list *t = per_cpu_ptr(&wd_timer, cpu);
+
+       del_timer_sync(t);
+}
+
+static int start_wd_on_cpu(unsigned int cpu)
+{
+       if (cpumask_test_cpu(cpu, &wd_cpus_enabled)) {
+               WARN_ON(1);
+               return 0;
+       }
+
+       if (!(watchdog_enabled & NMI_WATCHDOG_ENABLED))
+               return 0;
+
+       if (watchdog_suspended)
+               return 0;
+
+       if (!cpumask_test_cpu(cpu, &watchdog_cpumask))
+               return 0;
+
+       cpumask_set_cpu(cpu, &wd_cpus_enabled);
+       if (cpumask_weight(&wd_cpus_enabled) == 1) {
+               cpumask_set_cpu(cpu, &wd_smp_cpus_pending);
+               wd_smp_last_reset_tb = get_tb();
+       }
+       smp_wmb();
+       start_watchdog_timer_on(cpu);
+
+       return 0;
+}
+
+static int stop_wd_on_cpu(unsigned int cpu)
+{
+       if (!cpumask_test_cpu(cpu, &wd_cpus_enabled))
+               return 0; /* Can happen in CPU unplug case */
+
+       stop_watchdog_timer_on(cpu);
+
+       cpumask_clear_cpu(cpu, &wd_cpus_enabled);
+       wd_smp_clear_cpu_pending(cpu, get_tb());
+
+       return 0;
+}
+
+static void watchdog_calc_timeouts(void)
+{
+       wd_panic_timeout_tb = watchdog_thresh * ppc_tb_freq;
+
+       /* Have the SMP detector trigger a bit later */
+       wd_smp_panic_timeout_tb = wd_panic_timeout_tb * 3 / 2;
+
+       /* 2/5 is the factor that the perf based detector uses */
+       wd_timer_period_ms = watchdog_thresh * 1000 * 2 / 5;
+}
+
+void watchdog_nmi_reconfigure(void)
+{
+       int cpu;
+
+       watchdog_calc_timeouts();
+
+       for_each_cpu(cpu, &wd_cpus_enabled)
+               stop_wd_on_cpu(cpu);
+
+       for_each_cpu_and(cpu, cpu_online_mask, &watchdog_cpumask)
+               start_wd_on_cpu(cpu);
+}
+
+/*
+ * This runs after lockup_detector_init() which sets up watchdog_cpumask.
+ */
+static int __init powerpc_watchdog_init(void)
+{
+       int err;
+
+       watchdog_calc_timeouts();
+
+       err = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "powerpc/watchdog:online",
+                               start_wd_on_cpu, stop_wd_on_cpu);
+       if (err < 0)
+               pr_warn("Watchdog could not be initialized");
+
+       return 0;
+}
+arch_initcall(powerpc_watchdog_init);
+
+static void handle_backtrace_ipi(struct pt_regs *regs)
+{
+       nmi_cpu_backtrace(regs);
+}
+
+static void raise_backtrace_ipi(cpumask_t *mask)
+{
+       unsigned int cpu;
+
+       for_each_cpu(cpu, mask) {
+               if (cpu == smp_processor_id())
+                       handle_backtrace_ipi(NULL);
+               else
+                       smp_send_nmi_ipi(cpu, handle_backtrace_ipi, 1000000);
+       }
+}
+
+void arch_trigger_cpumask_backtrace(const cpumask_t *mask, bool exclude_self)
+{
+       nmi_trigger_cpumask_backtrace(mask, exclude_self, raise_backtrace_ipi);
+}
index 710e491206ed0a11ff96b4994d8dbb91f264b94a..8cb0190e2a737aab903d350bb6d79422a8c37ac5 100644 (file)
@@ -93,7 +93,7 @@ int kvmppc_allocate_hpt(struct kvm_hpt_info *info, u32 order)
        }
 
        if (!hpt)
-               hpt = __get_free_pages(GFP_KERNEL|__GFP_ZERO|__GFP_REPEAT
+               hpt = __get_free_pages(GFP_KERNEL|__GFP_ZERO|__GFP_RETRY_MAYFAIL
                                       |__GFP_NOWARN, order - PAGE_SHIFT);
 
        if (!hpt)
index f3917705c686cb3d9af359dd741a0ab0e40a615f..41cf5ae273cf74a2747d13b9da2274b8eb2054cb 100644 (file)
@@ -233,192 +233,192 @@ static long calc_offset(struct fixup_entry *entry, unsigned int *p)
 
 static void test_basic_patching(void)
 {
-       extern unsigned int ftr_fixup_test1;
-       extern unsigned int end_ftr_fixup_test1;
-       extern unsigned int ftr_fixup_test1_orig;
-       extern unsigned int ftr_fixup_test1_expected;
-       int size = &end_ftr_fixup_test1 - &ftr_fixup_test1;
+       extern unsigned int ftr_fixup_test1[];
+       extern unsigned int end_ftr_fixup_test1[];
+       extern unsigned int ftr_fixup_test1_orig[];
+       extern unsigned int ftr_fixup_test1_expected[];
+       int size = end_ftr_fixup_test1 - ftr_fixup_test1;
 
        fixup.value = fixup.mask = 8;
-       fixup.start_off = calc_offset(&fixup, &ftr_fixup_test1 + 1);
-       fixup.end_off = calc_offset(&fixup, &ftr_fixup_test1 + 2);
+       fixup.start_off = calc_offset(&fixup, ftr_fixup_test1 + 1);
+       fixup.end_off = calc_offset(&fixup, ftr_fixup_test1 + 2);
        fixup.alt_start_off = fixup.alt_end_off = 0;
 
        /* Sanity check */
-       check(memcmp(&ftr_fixup_test1, &ftr_fixup_test1_orig, size) == 0);
+       check(memcmp(ftr_fixup_test1, ftr_fixup_test1_orig, size) == 0);
 
        /* Check we don't patch if the value matches */
        patch_feature_section(8, &fixup);
-       check(memcmp(&ftr_fixup_test1, &ftr_fixup_test1_orig, size) == 0);
+       check(memcmp(ftr_fixup_test1, ftr_fixup_test1_orig, size) == 0);
 
        /* Check we do patch if the value doesn't match */
        patch_feature_section(0, &fixup);
-       check(memcmp(&ftr_fixup_test1, &ftr_fixup_test1_expected, size) == 0);
+       check(memcmp(ftr_fixup_test1, ftr_fixup_test1_expected, size) == 0);
 
        /* Check we do patch if the mask doesn't match */
-       memcpy(&ftr_fixup_test1, &ftr_fixup_test1_orig, size);
-       check(memcmp(&ftr_fixup_test1, &ftr_fixup_test1_orig, size) == 0);
+       memcpy(ftr_fixup_test1, ftr_fixup_test1_orig, size);
+       check(memcmp(ftr_fixup_test1, ftr_fixup_test1_orig, size) == 0);
        patch_feature_section(~8, &fixup);
-       check(memcmp(&ftr_fixup_test1, &ftr_fixup_test1_expected, size) == 0);
+       check(memcmp(ftr_fixup_test1, ftr_fixup_test1_expected, size) == 0);
 }
 
 static void test_alternative_patching(void)
 {
-       extern unsigned int ftr_fixup_test2;
-       extern unsigned int end_ftr_fixup_test2;
-       extern unsigned int ftr_fixup_test2_orig;
-       extern unsigned int ftr_fixup_test2_alt;
-       extern unsigned int ftr_fixup_test2_expected;
-       int size = &end_ftr_fixup_test2 - &ftr_fixup_test2;
+       extern unsigned int ftr_fixup_test2[];
+       extern unsigned int end_ftr_fixup_test2[];
+       extern unsigned int ftr_fixup_test2_orig[];
+       extern unsigned int ftr_fixup_test2_alt[];
+       extern unsigned int ftr_fixup_test2_expected[];
+       int size = end_ftr_fixup_test2 - ftr_fixup_test2;
 
        fixup.value = fixup.mask = 0xF;
-       fixup.start_off = calc_offset(&fixup, &ftr_fixup_test2 + 1);
-       fixup.end_off = calc_offset(&fixup, &ftr_fixup_test2 + 2);
-       fixup.alt_start_off = calc_offset(&fixup, &ftr_fixup_test2_alt);
-       fixup.alt_end_off = calc_offset(&fixup, &ftr_fixup_test2_alt + 1);
+       fixup.start_off = calc_offset(&fixup, ftr_fixup_test2 + 1);
+       fixup.end_off = calc_offset(&fixup, ftr_fixup_test2 + 2);
+       fixup.alt_start_off = calc_offset(&fixup, ftr_fixup_test2_alt);
+       fixup.alt_end_off = calc_offset(&fixup, ftr_fixup_test2_alt + 1);
 
        /* Sanity check */
-       check(memcmp(&ftr_fixup_test2, &ftr_fixup_test2_orig, size) == 0);
+       check(memcmp(ftr_fixup_test2, ftr_fixup_test2_orig, size) == 0);
 
        /* Check we don't patch if the value matches */
        patch_feature_section(0xF, &fixup);
-       check(memcmp(&ftr_fixup_test2, &ftr_fixup_test2_orig, size) == 0);
+       check(memcmp(ftr_fixup_test2, ftr_fixup_test2_orig, size) == 0);
 
        /* Check we do patch if the value doesn't match */
        patch_feature_section(0, &fixup);
-       check(memcmp(&ftr_fixup_test2, &ftr_fixup_test2_expected, size) == 0);
+       check(memcmp(ftr_fixup_test2, ftr_fixup_test2_expected, size) == 0);
 
        /* Check we do patch if the mask doesn't match */
-       memcpy(&ftr_fixup_test2, &ftr_fixup_test2_orig, size);
-       check(memcmp(&ftr_fixup_test2, &ftr_fixup_test2_orig, size) == 0);
+       memcpy(ftr_fixup_test2, ftr_fixup_test2_orig, size);
+       check(memcmp(ftr_fixup_test2, ftr_fixup_test2_orig, size) == 0);
        patch_feature_section(~0xF, &fixup);
-       check(memcmp(&ftr_fixup_test2, &ftr_fixup_test2_expected, size) == 0);
+       check(memcmp(ftr_fixup_test2, ftr_fixup_test2_expected, size) == 0);
 }
 
 static void test_alternative_case_too_big(void)
 {
-       extern unsigned int ftr_fixup_test3;
-       extern unsigned int end_ftr_fixup_test3;
-       extern unsigned int ftr_fixup_test3_orig;
-       extern unsigned int ftr_fixup_test3_alt;
-       int size = &end_ftr_fixup_test3 - &ftr_fixup_test3;
+       extern unsigned int ftr_fixup_test3[];
+       extern unsigned int end_ftr_fixup_test3[];
+       extern unsigned int ftr_fixup_test3_orig[];
+       extern unsigned int ftr_fixup_test3_alt[];
+       int size = end_ftr_fixup_test3 - ftr_fixup_test3;
 
        fixup.value = fixup.mask = 0xC;
-       fixup.start_off = calc_offset(&fixup, &ftr_fixup_test3 + 1);
-       fixup.end_off = calc_offset(&fixup, &ftr_fixup_test3 + 2);
-       fixup.alt_start_off = calc_offset(&fixup, &ftr_fixup_test3_alt);
-       fixup.alt_end_off = calc_offset(&fixup, &ftr_fixup_test3_alt + 2);
+       fixup.start_off = calc_offset(&fixup, ftr_fixup_test3 + 1);
+       fixup.end_off = calc_offset(&fixup, ftr_fixup_test3 + 2);
+       fixup.alt_start_off = calc_offset(&fixup, ftr_fixup_test3_alt);
+       fixup.alt_end_off = calc_offset(&fixup, ftr_fixup_test3_alt + 2);
 
        /* Sanity check */
-       check(memcmp(&ftr_fixup_test3, &ftr_fixup_test3_orig, size) == 0);
+       check(memcmp(ftr_fixup_test3, ftr_fixup_test3_orig, size) == 0);
 
        /* Expect nothing to be patched, and the error returned to us */
        check(patch_feature_section(0xF, &fixup) == 1);
-       check(memcmp(&ftr_fixup_test3, &ftr_fixup_test3_orig, size) == 0);
+       check(memcmp(ftr_fixup_test3, ftr_fixup_test3_orig, size) == 0);
        check(patch_feature_section(0, &fixup) == 1);
-       check(memcmp(&ftr_fixup_test3, &ftr_fixup_test3_orig, size) == 0);
+       check(memcmp(ftr_fixup_test3, ftr_fixup_test3_orig, size) == 0);
        check(patch_feature_section(~0xF, &fixup) == 1);
-       check(memcmp(&ftr_fixup_test3, &ftr_fixup_test3_orig, size) == 0);
+       check(memcmp(ftr_fixup_test3, ftr_fixup_test3_orig, size) == 0);
 }
 
 static void test_alternative_case_too_small(void)
 {
-       extern unsigned int ftr_fixup_test4;
-       extern unsigned int end_ftr_fixup_test4;
-       extern unsigned int ftr_fixup_test4_orig;
-       extern unsigned int ftr_fixup_test4_alt;
-       extern unsigned int ftr_fixup_test4_expected;
-       int size = &end_ftr_fixup_test4 - &ftr_fixup_test4;
+       extern unsigned int ftr_fixup_test4[];
+       extern unsigned int end_ftr_fixup_test4[];
+       extern unsigned int ftr_fixup_test4_orig[];
+       extern unsigned int ftr_fixup_test4_alt[];
+       extern unsigned int ftr_fixup_test4_expected[];
+       int size = end_ftr_fixup_test4 - ftr_fixup_test4;
        unsigned long flag;
 
        /* Check a high-bit flag */
        flag = 1UL << ((sizeof(unsigned long) - 1) * 8);
        fixup.value = fixup.mask = flag;
-       fixup.start_off = calc_offset(&fixup, &ftr_fixup_test4 + 1);
-       fixup.end_off = calc_offset(&fixup, &ftr_fixup_test4 + 5);
-       fixup.alt_start_off = calc_offset(&fixup, &ftr_fixup_test4_alt);
-       fixup.alt_end_off = calc_offset(&fixup, &ftr_fixup_test4_alt + 2);
+       fixup.start_off = calc_offset(&fixup, ftr_fixup_test4 + 1);
+       fixup.end_off = calc_offset(&fixup, ftr_fixup_test4 + 5);
+       fixup.alt_start_off = calc_offset(&fixup, ftr_fixup_test4_alt);
+       fixup.alt_end_off = calc_offset(&fixup, ftr_fixup_test4_alt + 2);
 
        /* Sanity check */
-       check(memcmp(&ftr_fixup_test4, &ftr_fixup_test4_orig, size) == 0);
+       check(memcmp(ftr_fixup_test4, ftr_fixup_test4_orig, size) == 0);
 
        /* Check we don't patch if the value matches */
        patch_feature_section(flag, &fixup);
-       check(memcmp(&ftr_fixup_test4, &ftr_fixup_test4_orig, size) == 0);
+       check(memcmp(ftr_fixup_test4, ftr_fixup_test4_orig, size) == 0);
 
        /* Check we do patch if the value doesn't match */
        patch_feature_section(0, &fixup);
-       check(memcmp(&ftr_fixup_test4, &ftr_fixup_test4_expected, size) == 0);
+       check(memcmp(ftr_fixup_test4, ftr_fixup_test4_expected, size) == 0);
 
        /* Check we do patch if the mask doesn't match */
-       memcpy(&ftr_fixup_test4, &ftr_fixup_test4_orig, size);
-       check(memcmp(&ftr_fixup_test4, &ftr_fixup_test4_orig, size) == 0);
+       memcpy(ftr_fixup_test4, ftr_fixup_test4_orig, size);
+       check(memcmp(ftr_fixup_test4, ftr_fixup_test4_orig, size) == 0);
        patch_feature_section(~flag, &fixup);
-       check(memcmp(&ftr_fixup_test4, &ftr_fixup_test4_expected, size) == 0);
+       check(memcmp(ftr_fixup_test4, ftr_fixup_test4_expected, size) == 0);
 }
 
 static void test_alternative_case_with_branch(void)
 {
-       extern unsigned int ftr_fixup_test5;
-       extern unsigned int end_ftr_fixup_test5;
-       extern unsigned int ftr_fixup_test5_expected;
-       int size = &end_ftr_fixup_test5 - &ftr_fixup_test5;
+       extern unsigned int ftr_fixup_test5[];
+       extern unsigned int end_ftr_fixup_test5[];
+       extern unsigned int ftr_fixup_test5_expected[];
+       int size = end_ftr_fixup_test5 - ftr_fixup_test5;
 
-       check(memcmp(&ftr_fixup_test5, &ftr_fixup_test5_expected, size) == 0);
+       check(memcmp(ftr_fixup_test5, ftr_fixup_test5_expected, size) == 0);
 }
 
 static void test_alternative_case_with_external_branch(void)
 {
-       extern unsigned int ftr_fixup_test6;
-       extern unsigned int end_ftr_fixup_test6;
-       extern unsigned int ftr_fixup_test6_expected;
-       int size = &end_ftr_fixup_test6 - &ftr_fixup_test6;
+       extern unsigned int ftr_fixup_test6[];
+       extern unsigned int end_ftr_fixup_test6[];
+       extern unsigned int ftr_fixup_test6_expected[];
+       int size = end_ftr_fixup_test6 - ftr_fixup_test6;
 
-       check(memcmp(&ftr_fixup_test6, &ftr_fixup_test6_expected, size) == 0);
+       check(memcmp(ftr_fixup_test6, ftr_fixup_test6_expected, size) == 0);
 }
 
 static void test_cpu_macros(void)
 {
-       extern u8 ftr_fixup_test_FTR_macros;
-       extern u8 ftr_fixup_test_FTR_macros_expected;
-       unsigned long size = &ftr_fixup_test_FTR_macros_expected -
-                            &ftr_fixup_test_FTR_macros;
+       extern u8 ftr_fixup_test_FTR_macros[];
+       extern u8 ftr_fixup_test_FTR_macros_expected[];
+       unsigned long size = ftr_fixup_test_FTR_macros_expected -
+                            ftr_fixup_test_FTR_macros;
 
        /* The fixups have already been done for us during boot */
-       check(memcmp(&ftr_fixup_test_FTR_macros,
-                    &ftr_fixup_test_FTR_macros_expected, size) == 0);
+       check(memcmp(ftr_fixup_test_FTR_macros,
+                    ftr_fixup_test_FTR_macros_expected, size) == 0);
 }
 
 static void test_fw_macros(void)
 {
 #ifdef CONFIG_PPC64
-       extern u8 ftr_fixup_test_FW_FTR_macros;
-       extern u8 ftr_fixup_test_FW_FTR_macros_expected;
-       unsigned long size = &ftr_fixup_test_FW_FTR_macros_expected -
-                            &ftr_fixup_test_FW_FTR_macros;
+       extern u8 ftr_fixup_test_FW_FTR_macros[];
+       extern u8 ftr_fixup_test_FW_FTR_macros_expected[];
+       unsigned long size = ftr_fixup_test_FW_FTR_macros_expected -
+                            ftr_fixup_test_FW_FTR_macros;
 
        /* The fixups have already been done for us during boot */
-       check(memcmp(&ftr_fixup_test_FW_FTR_macros,
-                    &ftr_fixup_test_FW_FTR_macros_expected, size) == 0);
+       check(memcmp(ftr_fixup_test_FW_FTR_macros,
+                    ftr_fixup_test_FW_FTR_macros_expected, size) == 0);
 #endif
 }
 
 static void test_lwsync_macros(void)
 {
-       extern u8 lwsync_fixup_test;
-       extern u8 end_lwsync_fixup_test;
-       extern u8 lwsync_fixup_test_expected_LWSYNC;
-       extern u8 lwsync_fixup_test_expected_SYNC;
-       unsigned long size = &end_lwsync_fixup_test -
-                            &lwsync_fixup_test;
+       extern u8 lwsync_fixup_test[];
+       extern u8 end_lwsync_fixup_test[];
+       extern u8 lwsync_fixup_test_expected_LWSYNC[];
+       extern u8 lwsync_fixup_test_expected_SYNC[];
+       unsigned long size = end_lwsync_fixup_test -
+                            lwsync_fixup_test;
 
        /* The fixups have already been done for us during boot */
        if (cur_cpu_spec->cpu_features & CPU_FTR_LWSYNC) {
-               check(memcmp(&lwsync_fixup_test,
-                            &lwsync_fixup_test_expected_LWSYNC, size) == 0);
+               check(memcmp(lwsync_fixup_test,
+                            lwsync_fixup_test_expected_LWSYNC, size) == 0);
        } else {
-               check(memcmp(&lwsync_fixup_test,
-                            &lwsync_fixup_test_expected_SYNC, size) == 0);
+               check(memcmp(lwsync_fixup_test,
+                            lwsync_fixup_test_expected_SYNC, size) == 0);
        }
 }
 
index 0ee6be4f1ba44e231da18c7e38d217610ac4bcd5..5d78b193fec4142ecd3d4d5788dfe686cf2e8e6c 100644 (file)
 /*
  * Top of mmap area (just below the process stack).
  *
- * Leave at least a ~128 MB hole on 32bit applications.
- *
- * On 64bit applications we randomise the stack by 1GB so we need to
- * space our mmap start address by a further 1GB, otherwise there is a
- * chance the mmap area will end up closer to the stack than our ulimit
- * requires.
+ * Leave at least a ~128 MB hole.
  */
-#define MIN_GAP32 (128*1024*1024)
-#define MIN_GAP64 ((128 + 1024)*1024*1024UL)
-#define MIN_GAP ((is_32bit_task()) ? MIN_GAP32 : MIN_GAP64)
+#define MIN_GAP (128*1024*1024)
 #define MAX_GAP (TASK_SIZE/6*5)
 
 static inline int mmap_is_legacy(void)
@@ -71,9 +64,26 @@ unsigned long arch_mmap_rnd(void)
        return rnd << PAGE_SHIFT;
 }
 
+static inline unsigned long stack_maxrandom_size(void)
+{
+       if (!(current->flags & PF_RANDOMIZE))
+               return 0;
+
+       /* 8MB for 32bit, 1GB for 64bit */
+       if (is_32bit_task())
+               return (1<<23);
+       else
+               return (1<<30);
+}
+
 static inline unsigned long mmap_base(unsigned long rnd)
 {
        unsigned long gap = rlimit(RLIMIT_STACK);
+       unsigned long pad = stack_maxrandom_size() + stack_guard_gap;
+
+       /* Values close to RLIM_INFINITY can overflow. */
+       if (gap + pad > gap)
+               gap += pad;
 
        if (gap < MIN_GAP)
                gap = MIN_GAP;
index 49a6bd45957b53aaee9a4ad387725c06ca299cac..3d0b14afa23258efa6047baa0954e1731fb166bb 100644 (file)
@@ -246,6 +246,7 @@ void arch_crash_save_vmcoreinfo(void)
        VMCOREINFO_SYMBOL(lowcore_ptr);
        VMCOREINFO_SYMBOL(high_memory);
        VMCOREINFO_LENGTH(lowcore_ptr, NR_CPUS);
+       mem_assign_absolute(S390_lowcore.vmcore_info, paddr_vmcoreinfo_note());
 }
 
 void machine_shutdown(void)
index 3ae756c0db3de276936283a5c3577fa264ba2aa9..3d1d808ea8a97bb31be7dee6a7c06e16b3f56ad6 100644 (file)
@@ -496,11 +496,6 @@ static void __init setup_memory_end(void)
        pr_notice("The maximum memory size is %luMB\n", memory_end >> 20);
 }
 
-static void __init setup_vmcoreinfo(void)
-{
-       mem_assign_absolute(S390_lowcore.vmcore_info, paddr_vmcoreinfo_note());
-}
-
 #ifdef CONFIG_CRASH_DUMP
 
 /*
@@ -939,7 +934,6 @@ void __init setup_arch(char **cmdline_p)
 #endif
 
        setup_resources();
-       setup_vmcoreinfo();
        setup_lowcore();
        smp_fill_possible_mask();
        cpu_detect_mhz_feature();
index 1b77f068be2b1d027ae460ce0e6919a84223a1f1..c9828f785ca0bce1f3e49a73def335bfc67eafc7 100644 (file)
@@ -48,6 +48,7 @@ do {                                                  \
                   "i" (__FILE__),                      \
                   "i" (__LINE__), "i" (0),             \
                   "i" (sizeof(struct bug_entry)));     \
+       unreachable();                                  \
 } while (0)
 
 #define __WARN_FLAGS(flags)                            \
index d9df3a76847c32b36ea2ba8d8318cba486643c64..141515a43b78121045d503ab110f385116013f57 100644 (file)
@@ -19,6 +19,7 @@ static __always_inline void boot_init_stack_canary(void)
        /* Try to get a semi random initial value. */
        get_random_bytes(&canary, sizeof(canary));
        canary ^= LINUX_VERSION_CODE;
+       canary &= CANARY_MASK;
 
        current->stack_canary = canary;
        __stack_chk_guard = current->stack_canary;
index d94dadedf74f57d5ad8d34d28c8c4cc3201745d1..445b5e69b73cb3954ff195d587f38e89dcea6167 100644 (file)
@@ -234,7 +234,7 @@ static void sh64_icache_inv_current_user_range(unsigned long start, unsigned lon
 #define DUMMY_ALLOCO_AREA_SIZE ((L1_CACHE_BYTES << 10) + (1024 * 4))
 static unsigned char dummy_alloco_area[DUMMY_ALLOCO_AREA_SIZE] __cacheline_aligned = { 0, };
 
-static void inline sh64_dcache_purge_sets(int sets_to_purge_base, int n_sets)
+static inline void sh64_dcache_purge_sets(int sets_to_purge_base, int n_sets)
 {
        /* Purge all ways in a particular block of sets, specified by the base
           set number and number of sets.  Can handle wrap-around, if that's
index 26ad2b2607c66304c6a76e74293a83b32a373dd4..284eac3ffaf2cdbb4b281ad070ba6328eff7c9d0 100644 (file)
@@ -7,6 +7,7 @@ void nmi_adjust_hz(unsigned int new_hz);
 
 extern atomic_t nmi_active;
 
+void arch_touch_nmi_watchdog(void);
 void start_nmi_watchdog(void *unused);
 void stop_nmi_watchdog(void *unused);
 
index e4b4e790bf899cb295bb7add3275140df55da17c..fa466ce45bc9bf115a488aaa95279922efc96bc4 100644 (file)
@@ -205,7 +205,7 @@ static struct mdesc_handle *mdesc_kmalloc(unsigned int mdesc_size)
        handle_size = (sizeof(struct mdesc_handle) -
                       sizeof(struct mdesc_hdr) +
                       mdesc_size);
-       base = kmalloc(handle_size + 15, GFP_KERNEL | __GFP_REPEAT);
+       base = kmalloc(handle_size + 15, GFP_KERNEL | __GFP_RETRY_MAYFAIL);
        if (!base)
                return NULL;
 
index 95e73c63c99d43b257ec511c14ae894322fb9349..048ad783ea3fe45adfa5ce23d48a106910e02de4 100644 (file)
@@ -51,7 +51,7 @@ static DEFINE_PER_CPU(unsigned int, last_irq_sum);
 static DEFINE_PER_CPU(long, alert_counter);
 static DEFINE_PER_CPU(int, nmi_touch);
 
-void touch_nmi_watchdog(void)
+void arch_touch_nmi_watchdog(void)
 {
        if (atomic_read(&nmi_active)) {
                int cpu;
@@ -61,10 +61,8 @@ void touch_nmi_watchdog(void)
                                per_cpu(nmi_touch, cpu) = 1;
                }
        }
-
-       touch_softlockup_watchdog();
 }
-EXPORT_SYMBOL(touch_nmi_watchdog);
+EXPORT_SYMBOL(arch_touch_nmi_watchdog);
 
 static void die_nmi(const char *str, struct pt_regs *regs, int do_panic)
 {
index 94a18681353d5767ca8288a8a3e019dd4457ef49..781521b7cf9ef6b6766dac9249a3568f257c4fa8 100644 (file)
@@ -50,6 +50,7 @@ config X86
        select ARCH_HAS_DEVMEM_IS_ALLOWED
        select ARCH_HAS_ELF_RANDOMIZE
        select ARCH_HAS_FAST_MULTIPLIER
+       select ARCH_HAS_FORTIFY_SOURCE
        select ARCH_HAS_GCOV_PROFILE_ALL
        select ARCH_HAS_KCOV                    if X86_64
        select ARCH_HAS_MMIO_FLUSH
@@ -162,6 +163,7 @@ config X86
        select HAVE_PCSPKR_PLATFORM
        select HAVE_PERF_EVENTS
        select HAVE_PERF_EVENTS_NMI
+       select HAVE_HARDLOCKUP_DETECTOR_PERF    if HAVE_PERF_EVENTS_NMI
        select HAVE_PERF_REGS
        select HAVE_PERF_USER_STACK_DUMP
        select HAVE_REGS_AND_STACK_ACCESS_API
index 00241c81552448c9a3877a16eb0183e2d81438ea..a0838ab929f2276de8ed08e2e0430f4e3e7943f8 100644 (file)
@@ -411,3 +411,8 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap,
        debug_putstr("done.\nBooting the kernel.\n");
        return output;
 }
+
+void fortify_panic(const char *name)
+{
+       error("detected buffer overflow");
+}
index d2ff779f347e5339d9dedf5261f7befac092f671..796ff6c1aa539ec6cb3843e501b0cbfa065182ae 100644 (file)
@@ -33,7 +33,7 @@
 
 #ifdef CONFIG_X86_32
 
-extern unsigned long asmlinkage efi_call_phys(void *, ...);
+extern asmlinkage unsigned long efi_call_phys(void *, ...);
 
 #define arch_efi_call_virt_setup()     kernel_fpu_begin()
 #define arch_efi_call_virt_teardown()  kernel_fpu_end()
@@ -52,7 +52,7 @@ extern unsigned long asmlinkage efi_call_phys(void *, ...);
 
 #define EFI_LOADER_SIGNATURE   "EL64"
 
-extern u64 asmlinkage efi_call(void *fp, ...);
+extern asmlinkage u64 efi_call(void *fp, ...);
 
 #define efi_call_phys(f, args...)              efi_call((f), args)
 
index dcbd9bcce71443eb64325051faa532a68bf50c00..8abedf1d650e9648f2a9864adf9ee815b000c0af 100644 (file)
@@ -74,6 +74,7 @@ static __always_inline void boot_init_stack_canary(void)
        get_random_bytes(&canary, sizeof(canary));
        tsc = rdtsc();
        canary += tsc + (tsc << 32UL);
+       canary &= CANARY_MASK;
 
        current->stack_canary = canary;
 #ifdef CONFIG_X86_64
index 3d3e8353ee5c09db9b83000efde37a9e34365690..e9ee84873de50a3b60a4cf8e978d4bf6eae9665f 100644 (file)
@@ -142,7 +142,9 @@ static __always_inline void *__constant_memcpy(void *to, const void *from,
 }
 
 #define __HAVE_ARCH_MEMCPY
+extern void *memcpy(void *, const void *, size_t);
 
+#ifndef CONFIG_FORTIFY_SOURCE
 #ifdef CONFIG_X86_USE_3DNOW
 
 #include <asm/mmx.h>
@@ -195,11 +197,15 @@ static inline void *__memcpy3d(void *to, const void *from, size_t len)
 #endif
 
 #endif
+#endif /* !CONFIG_FORTIFY_SOURCE */
 
 #define __HAVE_ARCH_MEMMOVE
 void *memmove(void *dest, const void *src, size_t n);
 
+extern int memcmp(const void *, const void *, size_t);
+#ifndef CONFIG_FORTIFY_SOURCE
 #define memcmp __builtin_memcmp
+#endif
 
 #define __HAVE_ARCH_MEMCHR
 extern void *memchr(const void *cs, int c, size_t count);
@@ -321,6 +327,8 @@ void *__constant_c_and_count_memset(void *s, unsigned long pattern,
         : __memset_generic((s), (c), (count)))
 
 #define __HAVE_ARCH_MEMSET
+extern void *memset(void *, int, size_t);
+#ifndef CONFIG_FORTIFY_SOURCE
 #if (__GNUC__ >= 4)
 #define memset(s, c, count) __builtin_memset(s, c, count)
 #else
@@ -330,6 +338,7 @@ void *__constant_c_and_count_memset(void *s, unsigned long pattern,
                                 (count))                               \
         : __memset((s), (c), (count)))
 #endif
+#endif /* !CONFIG_FORTIFY_SOURCE */
 
 /*
  * find the first occurrence of byte 'c', or 1 past the area if none
index 1f22bc277c455a2181551e5d304b80d0cf8d1c8f..2a8c822de1fc695c3bc18768c51494632043d6fe 100644 (file)
@@ -31,6 +31,7 @@ static __always_inline void *__inline_memcpy(void *to, const void *from, size_t
 extern void *memcpy(void *to, const void *from, size_t len);
 extern void *__memcpy(void *to, const void *from, size_t len);
 
+#ifndef CONFIG_FORTIFY_SOURCE
 #ifndef CONFIG_KMEMCHECK
 #if (__GNUC__ == 4 && __GNUC_MINOR__ < 3) || __GNUC__ < 4
 #define memcpy(dst, src, len)                                  \
@@ -51,6 +52,7 @@ extern void *__memcpy(void *to, const void *from, size_t len);
  */
 #define memcpy(dst, src, len) __inline_memcpy((dst), (src), (len))
 #endif
+#endif /* !CONFIG_FORTIFY_SOURCE */
 
 #define __HAVE_ARCH_MEMSET
 void *memset(void *s, int c, size_t n);
@@ -77,6 +79,11 @@ int strcmp(const char *cs, const char *ct);
 #define memcpy(dst, src, len) __memcpy(dst, src, len)
 #define memmove(dst, src, len) __memmove(dst, src, len)
 #define memset(s, c, n) __memset(s, c, n)
+
+#ifndef __NO_FORTIFY
+#define __NO_FORTIFY /* FORTIFY_SOURCE uses __builtin_memcpy, etc. */
+#endif
+
 #endif
 
 #define __HAVE_ARCH_MEMCPY_MCSAFE 1
index c73c9fb281e18f7d058fd69497f504adeb3147c1..d6f387780849a218c44483a353ab13617ec3eedb 100644 (file)
@@ -19,7 +19,7 @@
 #include <linux/init.h>
 #include <linux/delay.h>
 
-#ifdef CONFIG_HARDLOCKUP_DETECTOR
+#ifdef CONFIG_HARDLOCKUP_DETECTOR_PERF
 u64 hw_nmi_get_sample_period(int watchdog_thresh)
 {
        return (u64)(cpu_khz) * 1000 * watchdog_thresh;
index 22217ece26c8807a2747d8396a171c7a6b952842..44404e2307bbebaa9c1edd17db0cd4a07099462a 100644 (file)
@@ -457,7 +457,7 @@ static int prepare_elf64_headers(struct crash_elf_data *ced,
        bufp += sizeof(Elf64_Phdr);
        phdr->p_type = PT_NOTE;
        phdr->p_offset = phdr->p_paddr = paddr_vmcoreinfo_note();
-       phdr->p_filesz = phdr->p_memsz = sizeof(vmcoreinfo_note);
+       phdr->p_filesz = phdr->p_memsz = VMCOREINFO_NOTE_SIZE;
        (ehdr->e_phnum)++;
 
 #ifdef CONFIG_X86_64
index cad12634d6bd8f5564b71bf7fc33d7354f149a9a..2eab7d0bfeddc6ddccee9c1318188418ebe0585e 100644 (file)
@@ -6,7 +6,7 @@
 
 __visible void *memcpy(void *to, const void *from, size_t n)
 {
-#ifdef CONFIG_X86_USE_3DNOW
+#if defined(CONFIG_X86_USE_3DNOW) && !defined(CONFIG_FORTIFY_SOURCE)
        return __memcpy3d(to, from, n);
 #else
        return __memcpy(to, from, n);
index 797295e792b2fd237beec9f2354972420a83b01d..229d04a83f8561ec08e394f57d8cb5d671bd56ff 100644 (file)
@@ -92,13 +92,18 @@ unsigned long arch_mmap_rnd(void)
 static unsigned long mmap_base(unsigned long rnd, unsigned long task_size)
 {
        unsigned long gap = rlimit(RLIMIT_STACK);
+       unsigned long pad = stack_maxrandom_size(task_size) + stack_guard_gap;
        unsigned long gap_min, gap_max;
 
+       /* Values close to RLIM_INFINITY can overflow. */
+       if (gap + pad > gap)
+               gap += pad;
+
        /*
         * Top of mmap area (just below the process stack).
         * Leave an at least ~128 MB hole with possible stack randomization.
         */
-       gap_min = SIZE_128M + stack_maxrandom_size(task_size);
+       gap_min = SIZE_128M;
        gap_max = (task_size / 6) * 5;
 
        if (gap < gap_min)
index 1d7a7213a3109fb3f21bba9a7a6789496b19b346..cab28cf2cffbb78d09325c46413f65381ec6c5e3 100644 (file)
@@ -2693,8 +2693,8 @@ EXPORT_SYMBOL_GPL(xen_destroy_contiguous_region);
 phys_addr_t paddr_vmcoreinfo_note(void)
 {
        if (xen_pv_domain())
-               return virt_to_machine(&vmcoreinfo_note).maddr;
+               return virt_to_machine(vmcoreinfo_note).maddr;
        else
-               return __pa_symbol(&vmcoreinfo_note);
+               return __pa(vmcoreinfo_note);
 }
 #endif /* CONFIG_KEXEC_CORE */
index 01a260f67437488b425372c12d33142fce699f84..23cab7a8c1c19141726d419f518946c41d546df1 100644 (file)
@@ -987,6 +987,11 @@ void add_device_randomness(const void *buf, unsigned int size)
        unsigned long time = random_get_entropy() ^ jiffies;
        unsigned long flags;
 
+       if (!crng_ready()) {
+               crng_fast_load(buf, size);
+               return;
+       }
+
        trace_add_device_randomness(size, _RET_IP_);
        spin_lock_irqsave(&input_pool.lock, flags);
        _mix_pool_bytes(&input_pool, buf, size);
index f7425960f6a57d4d4048e905d4d6b101b333c114..37e24f525162e10d95e625953773783349ee1316 100644 (file)
@@ -17,6 +17,7 @@ cflags-$(CONFIG_ARM)          := $(subst -pg,,$(KBUILD_CFLAGS)) \
 cflags-$(CONFIG_EFI_ARMSTUB)   += -I$(srctree)/scripts/dtc/libfdt
 
 KBUILD_CFLAGS                  := $(cflags-y) -DDISABLE_BRANCH_PROFILING \
+                                  -D__NO_FORTIFY \
                                   $(call cc-option,-ffreestanding) \
                                   $(call cc-option,-fno-stack-protector)
 
index 7dcac3bfb7719a44e7246291e843ee8b8433b76a..969bac8404f18cb31d4b22da8b0284d42f174541 100644 (file)
@@ -2434,8 +2434,9 @@ rebuild_st:
                                 * again with !__GFP_NORETRY. However, we still
                                 * want to fail this allocation rather than
                                 * trigger the out-of-memory killer and for
-                                * this we want the future __GFP_MAYFAIL.
+                                * this we want __GFP_RETRY_MAYFAIL.
                                 */
+                               gfp |= __GFP_RETRY_MAYFAIL;
                        }
                } while (1);
 
index 23039768f5416e63e6f9f367411e4642fa610356..be944d5aa9afc1c3d357653275d7c39883d801ec 100644 (file)
@@ -995,7 +995,9 @@ static int send_atomic_ack(struct rxe_qp *qp, struct rxe_pkt_info *pkt,
        free_rd_atomic_resource(qp, res);
        rxe_advance_resp_resource(qp);
 
-       memcpy(SKB_TO_PKT(skb), &ack_pkt, sizeof(skb->cb));
+       memcpy(SKB_TO_PKT(skb), &ack_pkt, sizeof(ack_pkt));
+       memset((unsigned char *)SKB_TO_PKT(skb) + sizeof(ack_pkt), 0,
+              sizeof(skb->cb) - sizeof(ack_pkt));
 
        res->type = RXE_ATOMIC_MASK;
        res->atomic.skb = skb;
index e15a9733fcfdd9697269749c4901ed997aa4b0c1..9668616faf16bbd5ee81276bb0ce7457f452309e 100644 (file)
@@ -1386,7 +1386,7 @@ static void wbsd_request_dma(struct wbsd_host *host, int dma)
         * order for ISA to be able to DMA to it.
         */
        host->dma_buffer = kmalloc(WBSD_DMA_SIZE,
-               GFP_NOIO | GFP_DMA | __GFP_REPEAT | __GFP_NOWARN);
+               GFP_NOIO | GFP_DMA | __GFP_RETRY_MAYFAIL | __GFP_NOWARN);
        if (!host->dma_buffer)
                goto free;
 
index 65f5a794f26d030380142837969bff389ad3c127..98749fa817dac98863cc36afb0822c49018ea89e 100644 (file)
@@ -98,7 +98,7 @@ vmcp_write(struct file *file, const char __user *buff, size_t count,
        }
        if (!session->response)
                session->response = (char *)__get_free_pages(GFP_KERNEL
-                                               | __GFP_REPEAT | GFP_DMA,
+                                               | __GFP_RETRY_MAYFAIL | GFP_DMA,
                                                get_order(session->bufsize));
        if (!session->response) {
                mutex_unlock(&session->mutex);
index 1563b1458e44d437cfdf1731fd38120c2f125915..2ade6131a89f28045ae98e8b3de1b72c5f54edf2 100644 (file)
@@ -1115,7 +1115,7 @@ static const struct net_device_ops ctcm_mpc_netdev_ops = {
        .ndo_start_xmit         = ctcmpc_tx,
 };
 
-void static ctcm_dev_setup(struct net_device *dev)
+static void ctcm_dev_setup(struct net_device *dev)
 {
        dev->type = ARPHRD_SLIP;
        dev->tx_queue_len = 100;
index 3062cde33a3dbcc23988362c9776de7158eba147..8975cd32139047cf03afe9a93544cc919eaed661 100644 (file)
@@ -2408,7 +2408,7 @@ static int qeth_l3_do_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
        return rc;
 }
 
-int inline qeth_l3_get_cast_type(struct qeth_card *card, struct sk_buff *skb)
+inline int qeth_l3_get_cast_type(struct qeth_card *card, struct sk_buff *skb)
 {
        int cast_type = RTN_UNSPEC;
        struct neighbour *n = NULL;
index f1b3a46bdcaffaf8a301569ef7e328ad2c47087f..1bdc10651bcd118d89954042d79f221acd41366c 100644 (file)
@@ -252,7 +252,7 @@ int transport_alloc_session_tags(struct se_session *se_sess,
        int rc;
 
        se_sess->sess_cmd_map = kzalloc(tag_num * tag_size,
-                                       GFP_KERNEL | __GFP_NOWARN | __GFP_REPEAT);
+                                       GFP_KERNEL | __GFP_NOWARN | __GFP_RETRY_MAYFAIL);
        if (!se_sess->sess_cmd_map) {
                se_sess->sess_cmd_map = vzalloc(tag_num * tag_size);
                if (!se_sess->sess_cmd_map) {
index 2a61dd6b40094d48c2a39b84ffe028cf0f1379ff..906ee770ff4a89ec9d94a8282d4e3db0c71dc0c2 100644 (file)
@@ -377,7 +377,7 @@ static struct ioc3_port *get_ioc3_port(struct uart_port *the_port)
  *                     called per port from attach...
  * @port: port to initialize
  */
-static int inline port_init(struct ioc3_port *port)
+static inline int port_init(struct ioc3_port *port)
 {
        uint32_t sio_cr;
        struct port_hooks *hooks = port->ip_hooks;
@@ -1430,7 +1430,7 @@ static int receive_chars(struct uart_port *the_port)
  * @pending: interrupts to handle
  */
 
-static int inline
+static inline int
 ioc3uart_intr_one(struct ioc3_submodule *is,
                        struct ioc3_driver_data *idd,
                        unsigned int pending)
index f96bcf9bee2591258409b1477cc63a6b83266ec3..43d7d32eb15006b620a0443c9dc8a93ab01cc995 100644 (file)
@@ -824,7 +824,7 @@ pending_intrs(struct ioc4_soft *soft, int type)
  *                     called per port from attach...
  * @port: port to initialize
  */
-static int inline port_init(struct ioc4_port *port)
+static inline int port_init(struct ioc4_port *port)
 {
        uint32_t sio_cr;
        struct hooks *hooks = port->ip_hooks;
@@ -1048,7 +1048,7 @@ static irqreturn_t ioc4_intr(int irq, void *arg)
  *                     IOC4 with serial ports in the system.
  * @idd: Master module data for this IOC4
  */
-static int inline ioc4_attach_local(struct ioc4_driver_data *idd)
+static inline int ioc4_attach_local(struct ioc4_driver_data *idd)
 {
        struct ioc4_port *port;
        struct ioc4_port *ports[IOC4_NUM_SERIAL_PORTS];
index 8a069aa154eda461ae12807d2518bbfd0bb27bf4..27d7a701629877859e97bdc95d875d59c39552a2 100644 (file)
@@ -180,7 +180,7 @@ static const __u16 crc10_table[256] = {
  * Perform a memcpy and calculate fcs using ppp 10bit CRC algorithm. Return
  * new 10 bit FCS.
  */
-static __u16 __inline__ fcs_compute10(unsigned char *sp, int len, __u16 fcs)
+static inline __u16 fcs_compute10(unsigned char *sp, int len, __u16 fcs)
 {
        for (; len-- > 0; fcs = CRC10_FCS(fcs, *sp++));
        return fcs;
index e3d7ea1288c68a55efead365e5a760f37d487015..06d044862e589ff05194d695bfdc610ed7e62195 100644 (file)
@@ -897,7 +897,7 @@ static int vhost_net_open(struct inode *inode, struct file *f)
        struct sk_buff **queue;
        int i;
 
-       n = kvmalloc(sizeof *n, GFP_KERNEL | __GFP_REPEAT);
+       n = kvmalloc(sizeof *n, GFP_KERNEL | __GFP_RETRY_MAYFAIL);
        if (!n)
                return -ENOMEM;
        vqs = kmalloc(VHOST_NET_VQ_MAX * sizeof(*vqs), GFP_KERNEL);
index fd6c8b66f06fd97734bfdad5917c7f65de70265d..ff02a942c4d5fb31bf24bc2fd92270592daa119d 100644 (file)
@@ -1404,7 +1404,7 @@ static int vhost_scsi_open(struct inode *inode, struct file *f)
        struct vhost_virtqueue **vqs;
        int r = -ENOMEM, i;
 
-       vs = kzalloc(sizeof(*vs), GFP_KERNEL | __GFP_NOWARN | __GFP_REPEAT);
+       vs = kzalloc(sizeof(*vs), GFP_KERNEL | __GFP_NOWARN | __GFP_RETRY_MAYFAIL);
        if (!vs) {
                vs = vzalloc(sizeof(*vs));
                if (!vs)
index 3f63e03de8e8099f1a4c1c23b5d5fdfe7eaca634..c9de9c41aa9769863e8164ec90478292f52532ba 100644 (file)
@@ -508,7 +508,7 @@ static int vhost_vsock_dev_open(struct inode *inode, struct file *file)
        /* This struct is large and allocation could fail, fall back to vmalloc
         * if there is no other way.
         */
-       vsock = kvmalloc(sizeof(*vsock), GFP_KERNEL | __GFP_REPEAT);
+       vsock = kvmalloc(sizeof(*vsock), GFP_KERNEL | __GFP_RETRY_MAYFAIL);
        if (!vsock)
                return -ENOMEM;
 
index 6b444400a86c2252c7f0c165869113f8393ca1a3..ffc391208b27152ffcea7ce1a6842ecceb754597 100644 (file)
@@ -907,7 +907,7 @@ static void intelfb_pci_unregister(struct pci_dev *pdev)
  *                       helper functions                      *
  ***************************************************************/
 
-int __inline__ intelfb_var_to_depth(const struct fb_var_screeninfo *var)
+__inline__ int intelfb_var_to_depth(const struct fb_var_screeninfo *var)
 {
        DBG_MSG("intelfb_var_to_depth: bpp: %d, green.length is %d\n",
                var->bits_per_pixel, var->green.length);
index e3d9b9ea5498e0f840c5189bd1401200494d9ae2..938cba0d24aef7effc7af0fe1e852780cd60d9d0 100644 (file)
@@ -79,12 +79,12 @@ static struct omap_lcd_controller {
        unsigned long           vram_size;
 } lcdc;
 
-static void inline enable_irqs(int mask)
+static inline void enable_irqs(int mask)
 {
        lcdc.irq_mask |= mask;
 }
 
-static void inline disable_irqs(int mask)
+static inline void disable_irqs(int mask)
 {
        lcdc.irq_mask &= ~mask;
 }
@@ -466,7 +466,7 @@ static void calc_ck_div(int is_tft, int pck, int *pck_div)
        }
 }
 
-static void inline setup_regs(void)
+static inline void setup_regs(void)
 {
        u32 l;
        struct lcd_panel *panel = lcdc.fbdev->panel;
index b0e42b6a96b97e37b03b7b61e9def73e814f421e..7aee6d699fd6b38949df0563281473c3afb445c2 100644 (file)
@@ -80,7 +80,6 @@ config EXPORTFS_BLOCK_OPS
 config FILE_LOCKING
        bool "Enable POSIX file locking API" if EXPERT
        default y
-       select PERCPU_RWSEM
        help
          This option enables standard file locking support, required
           for filesystems like NFS and for the flock() system
index 25e312cb60716caef70a66f12eb391f8aa234005..9a69392f1fb375c4c16b07332248edf7da57c9b6 100644 (file)
@@ -419,7 +419,7 @@ static int bfs_fill_super(struct super_block *s, void *data, int silent)
                if (i_sblock > info->si_blocks ||
                        i_eblock > info->si_blocks ||
                        i_sblock > i_eblock ||
-                       i_eoff > s_size ||
+                       (i_eoff != le32_to_cpu(-1) && i_eoff > s_size) ||
                        i_sblock * BFS_BSIZE > i_eoff) {
 
                        printf("Inode 0x%08x corrupted\n", i);
index a6d194831ed86af3d2a9c900a3e3f7e397f019cc..e767e4389cb13da3525a1e3ec9c2b4e7501f110c 100644 (file)
@@ -960,10 +960,14 @@ static void ep_show_fdinfo(struct seq_file *m, struct file *f)
        mutex_lock(&ep->mtx);
        for (rbp = rb_first(&ep->rbr); rbp; rbp = rb_next(rbp)) {
                struct epitem *epi = rb_entry(rbp, struct epitem, rbn);
+               struct inode *inode = file_inode(epi->ffd.file);
 
-               seq_printf(m, "tfd: %8d events: %8x data: %16llx\n",
+               seq_printf(m, "tfd: %8d events: %8x data: %16llx "
+                          " pos:%lli ino:%lx sdev:%x\n",
                           epi->ffd.fd, epi->event.events,
-                          (long long)epi->event.data);
+                          (long long)epi->event.data,
+                          (long long)epi->ffd.file->f_pos,
+                          inode->i_ino, inode->i_sb->s_dev);
                if (seq_has_overflowed(m))
                        break;
        }
@@ -1073,6 +1077,50 @@ static struct epitem *ep_find(struct eventpoll *ep, struct file *file, int fd)
        return epir;
 }
 
+#ifdef CONFIG_CHECKPOINT_RESTORE
+static struct epitem *ep_find_tfd(struct eventpoll *ep, int tfd, unsigned long toff)
+{
+       struct rb_node *rbp;
+       struct epitem *epi;
+
+       for (rbp = rb_first(&ep->rbr); rbp; rbp = rb_next(rbp)) {
+               epi = rb_entry(rbp, struct epitem, rbn);
+               if (epi->ffd.fd == tfd) {
+                       if (toff == 0)
+                               return epi;
+                       else
+                               toff--;
+               }
+               cond_resched();
+       }
+
+       return NULL;
+}
+
+struct file *get_epoll_tfile_raw_ptr(struct file *file, int tfd,
+                                    unsigned long toff)
+{
+       struct file *file_raw;
+       struct eventpoll *ep;
+       struct epitem *epi;
+
+       if (!is_file_epoll(file))
+               return ERR_PTR(-EINVAL);
+
+       ep = file->private_data;
+
+       mutex_lock(&ep->mtx);
+       epi = ep_find_tfd(ep, tfd, toff);
+       if (epi)
+               file_raw = epi->ffd.file;
+       else
+               file_raw = ERR_PTR(-ENOENT);
+       mutex_unlock(&ep->mtx);
+
+       return file_raw;
+}
+#endif /* CONFIG_CHECKPOINT_RESTORE */
+
 /*
  * This is the callback that is passed to the wait queue wakeup
  * mechanism. It is called by the stored file descriptors when they
index 8b426f83909f620454ed6827b3ebe3125dd883a3..245c430a2e418b1ccaea562b8ce0f7425bc66c3e 100644 (file)
@@ -380,8 +380,8 @@ static void inode_switch_wbs_work_fn(struct work_struct *work)
                struct page *page = radix_tree_deref_slot_protected(slot,
                                                        &mapping->tree_lock);
                if (likely(page) && PageDirty(page)) {
-                       __dec_wb_stat(old_wb, WB_RECLAIMABLE);
-                       __inc_wb_stat(new_wb, WB_RECLAIMABLE);
+                       dec_wb_stat(old_wb, WB_RECLAIMABLE);
+                       inc_wb_stat(new_wb, WB_RECLAIMABLE);
                }
        }
 
@@ -391,8 +391,8 @@ static void inode_switch_wbs_work_fn(struct work_struct *work)
                                                        &mapping->tree_lock);
                if (likely(page)) {
                        WARN_ON_ONCE(!PageWriteback(page));
-                       __dec_wb_stat(old_wb, WB_WRITEBACK);
-                       __inc_wb_stat(new_wb, WB_WRITEBACK);
+                       dec_wb_stat(old_wb, WB_WRITEBACK);
+                       inc_wb_stat(new_wb, WB_WRITEBACK);
                }
        }
 
index f1e1927ccd484e7372fe2a38db7455468bbf06e8..88b773f318cd06f31ca6262bb5949728c199ade0 100644 (file)
@@ -1355,6 +1355,53 @@ static const struct file_operations proc_fault_inject_operations = {
        .write          = proc_fault_inject_write,
        .llseek         = generic_file_llseek,
 };
+
+static ssize_t proc_fail_nth_write(struct file *file, const char __user *buf,
+                                  size_t count, loff_t *ppos)
+{
+       struct task_struct *task;
+       int err, n;
+
+       task = get_proc_task(file_inode(file));
+       if (!task)
+               return -ESRCH;
+       put_task_struct(task);
+       if (task != current)
+               return -EPERM;
+       err = kstrtoint_from_user(buf, count, 10, &n);
+       if (err)
+               return err;
+       if (n < 0 || n == INT_MAX)
+               return -EINVAL;
+       current->fail_nth = n + 1;
+       return count;
+}
+
+static ssize_t proc_fail_nth_read(struct file *file, char __user *buf,
+                                 size_t count, loff_t *ppos)
+{
+       struct task_struct *task;
+       int err;
+
+       task = get_proc_task(file_inode(file));
+       if (!task)
+               return -ESRCH;
+       put_task_struct(task);
+       if (task != current)
+               return -EPERM;
+       if (count < 1)
+               return -EINVAL;
+       err = put_user((char)(current->fail_nth ? 'N' : 'Y'), buf);
+       if (err)
+               return err;
+       current->fail_nth = 0;
+       return 1;
+}
+
+static const struct file_operations proc_fail_nth_operations = {
+       .read           = proc_fail_nth_read,
+       .write          = proc_fail_nth_write,
+};
 #endif
 
 
@@ -3311,6 +3358,11 @@ static const struct pid_entry tid_base_stuff[] = {
 #endif
 #ifdef CONFIG_FAULT_INJECTION
        REG("make-it-fail", S_IRUGO|S_IWUSR, proc_fault_inject_operations),
+       /*
+        * Operations on the file check that the task is current,
+        * so we create it with 0666 to support testing under unprivileged user.
+        */
+       REG("fail-nth", 0666, proc_fail_nth_operations),
 #endif
 #ifdef CONFIG_TASK_IO_ACCOUNTING
        ONE("io",       S_IRUSR, proc_tid_io_accounting),
index 9bf06e2b12846a9765027cc47d0ef8f6d9bd734a..8f479229b349d3f629884b51864279f31593bee8 100644 (file)
@@ -1078,16 +1078,30 @@ static int sysctl_err(const char *path, struct ctl_table *table, char *fmt, ...)
        return -EINVAL;
 }
 
+static int sysctl_check_table_array(const char *path, struct ctl_table *table)
+{
+       int err = 0;
+
+       if ((table->proc_handler == proc_douintvec) ||
+           (table->proc_handler == proc_douintvec_minmax)) {
+               if (table->maxlen != sizeof(unsigned int))
+                       err |= sysctl_err(path, table, "array now allowed");
+       }
+
+       return err;
+}
+
 static int sysctl_check_table(const char *path, struct ctl_table *table)
 {
        int err = 0;
        for (; table->procname; table++) {
                if (table->child)
-                       err = sysctl_err(path, table, "Not a file");
+                       err |= sysctl_err(path, table, "Not a file");
 
                if ((table->proc_handler == proc_dostring) ||
                    (table->proc_handler == proc_dointvec) ||
                    (table->proc_handler == proc_douintvec) ||
+                   (table->proc_handler == proc_douintvec_minmax) ||
                    (table->proc_handler == proc_dointvec_minmax) ||
                    (table->proc_handler == proc_dointvec_jiffies) ||
                    (table->proc_handler == proc_dointvec_userhz_jiffies) ||
@@ -1095,15 +1109,17 @@ static int sysctl_check_table(const char *path, struct ctl_table *table)
                    (table->proc_handler == proc_doulongvec_minmax) ||
                    (table->proc_handler == proc_doulongvec_ms_jiffies_minmax)) {
                        if (!table->data)
-                               err = sysctl_err(path, table, "No data");
+                               err |= sysctl_err(path, table, "No data");
                        if (!table->maxlen)
-                               err = sysctl_err(path, table, "No maxlen");
+                               err |= sysctl_err(path, table, "No maxlen");
+                       else
+                               err |= sysctl_check_table_array(path, table);
                }
                if (!table->proc_handler)
-                       err = sysctl_err(path, table, "No proc_handler");
+                       err |= sysctl_err(path, table, "No proc_handler");
 
                if ((table->mode & (S_IRUGO|S_IWUGO)) != table->mode)
-                       err = sysctl_err(path, table, "bogus .mode 0%o",
+                       err |= sysctl_err(path, table, "bogus .mode 0%o",
                                table->mode);
        }
        return err;
index d6ea520162b26530bbb560b667d925b6c6690b63..4d85992d75b225968d0055aec9d48888dfdd2dd8 100644 (file)
@@ -54,6 +54,16 @@ kmem_flags_convert(xfs_km_flags_t flags)
                        lflags &= ~__GFP_FS;
        }
 
+       /*
+        * Default page/slab allocator behavior is to retry for ever
+        * for small allocations. We can override this behavior by using
+        * __GFP_RETRY_MAYFAIL which will tell the allocator to retry as long
+        * as it is feasible but rather fail than retry forever for all
+        * request sizes.
+        */
+       if (flags & KM_MAYFAIL)
+               lflags |= __GFP_RETRY_MAYFAIL;
+
        if (flags & KM_ZERO)
                lflags |= __GFP_ZERO;
 
index 334165c911f0ae78dc16d95fb1e98e0f5d6144d4..854e1bdd0b2a4978aa739d056103400ed65e1381 100644 (file)
@@ -69,34 +69,14 @@ static inline void __add_wb_stat(struct bdi_writeback *wb,
        percpu_counter_add_batch(&wb->stat[item], amount, WB_STAT_BATCH);
 }
 
-static inline void __inc_wb_stat(struct bdi_writeback *wb,
-                                enum wb_stat_item item)
-{
-       __add_wb_stat(wb, item, 1);
-}
-
 static inline void inc_wb_stat(struct bdi_writeback *wb, enum wb_stat_item item)
 {
-       unsigned long flags;
-
-       local_irq_save(flags);
-       __inc_wb_stat(wb, item);
-       local_irq_restore(flags);
-}
-
-static inline void __dec_wb_stat(struct bdi_writeback *wb,
-                                enum wb_stat_item item)
-{
-       __add_wb_stat(wb, item, -1);
+       __add_wb_stat(wb, item, 1);
 }
 
 static inline void dec_wb_stat(struct bdi_writeback *wb, enum wb_stat_item item)
 {
-       unsigned long flags;
-
-       local_irq_save(flags);
-       __dec_wb_stat(wb, item);
-       local_irq_restore(flags);
+       __add_wb_stat(wb, item, -1);
 }
 
 static inline s64 wb_stat(struct bdi_writeback *wb, enum wb_stat_item item)
index 4090a42578a8f4b047930508d5655b603c31bdd5..2df2118fbe13568c66bc93f5f249d6b9bc39f7b2 100644 (file)
@@ -19,7 +19,7 @@
                                     CRASH_CORE_NOTE_NAME_BYTES +       \
                                     CRASH_CORE_NOTE_DESC_BYTES)
 
-#define VMCOREINFO_BYTES          (4096)
+#define VMCOREINFO_BYTES          PAGE_SIZE
 #define VMCOREINFO_NOTE_NAME      "VMCOREINFO"
 #define VMCOREINFO_NOTE_NAME_BYTES ALIGN(sizeof(VMCOREINFO_NOTE_NAME), 4)
 #define VMCOREINFO_NOTE_SIZE      ((CRASH_CORE_NOTE_HEAD_BYTES * 2) +  \
@@ -28,6 +28,7 @@
 
 typedef u32 note_buf_t[CRASH_CORE_NOTE_BYTES/4];
 
+void crash_update_vmcoreinfo_safecopy(void *ptr);
 void crash_save_vmcoreinfo(void);
 void arch_crash_save_vmcoreinfo(void);
 __printf(1, 2)
@@ -56,9 +57,7 @@ phys_addr_t paddr_vmcoreinfo_note(void);
 #define VMCOREINFO_CONFIG(name) \
        vmcoreinfo_append_str("CONFIG_%s=y\n", #name)
 
-extern u32 vmcoreinfo_note[VMCOREINFO_NOTE_SIZE/4];
-extern size_t vmcoreinfo_size;
-extern size_t vmcoreinfo_max_size;
+extern u32 *vmcoreinfo_note;
 
 Elf_Word *append_elf_note(Elf_Word *buf, char *name, unsigned int type,
                          void *data, size_t data_len);
index 025727bf679745e507bb427f14198bf1e607ae8e..c706eaac692e8129ce4b0a9d41b298eb3c3756c8 100644 (file)
@@ -592,8 +592,8 @@ static inline struct inode *d_real_inode(const struct dentry *dentry)
 }
 
 struct name_snapshot {
-       const char *name;
-       char inline_name[DNAME_INLINE_LEN];
+       const unsigned char *name;
+       unsigned char inline_name[DNAME_INLINE_LEN];
 };
 void take_dentry_name_snapshot(struct name_snapshot *, struct dentry *);
 void release_dentry_name_snapshot(struct name_snapshot *);
index 6daf6d4971f65266c3c83e684af231dbafc452c0..2f14ac73d01d5494058aefd376bdc9328f93fd93 100644 (file)
@@ -14,6 +14,7 @@
 #define _LINUX_EVENTPOLL_H
 
 #include <uapi/linux/eventpoll.h>
+#include <uapi/linux/kcmp.h>
 
 
 /* Forward declarations to avoid compiler errors */
@@ -22,6 +23,10 @@ struct file;
 
 #ifdef CONFIG_EPOLL
 
+#ifdef CONFIG_CHECKPOINT_RESTORE
+struct file *get_epoll_tfile_raw_ptr(struct file *file, int tfd, unsigned long toff);
+#endif
+
 /* Used to initialize the epoll bits inside the "struct file" */
 static inline void eventpoll_init_file(struct file *file)
 {
index 4c6656f1fee7a7bc2f0eccab8ecf39de8c91f404..bcfb9f7c46f5dc3d1d8f6d8f2f454e09243a610d 100644 (file)
@@ -25,7 +25,7 @@ struct vm_area_struct;
 #define ___GFP_FS              0x80u
 #define ___GFP_COLD            0x100u
 #define ___GFP_NOWARN          0x200u
-#define ___GFP_REPEAT          0x400u
+#define ___GFP_RETRY_MAYFAIL   0x400u
 #define ___GFP_NOFAIL          0x800u
 #define ___GFP_NORETRY         0x1000u
 #define ___GFP_MEMALLOC                0x2000u
@@ -136,26 +136,56 @@ struct vm_area_struct;
  *
  * __GFP_RECLAIM is shorthand to allow/forbid both direct and kswapd reclaim.
  *
- * __GFP_REPEAT: Try hard to allocate the memory, but the allocation attempt
- *   _might_ fail.  This depends upon the particular VM implementation.
+ * The default allocator behavior depends on the request size. We have a concept
+ * of so called costly allocations (with order > PAGE_ALLOC_COSTLY_ORDER).
+ * !costly allocations are too essential to fail so they are implicitly
+ * non-failing by default (with some exceptions like OOM victims might fail so
+ * the caller still has to check for failures) while costly requests try to be
+ * not disruptive and back off even without invoking the OOM killer.
+ * The following three modifiers might be used to override some of these
+ * implicit rules
+ *
+ * __GFP_NORETRY: The VM implementation will try only very lightweight
+ *   memory direct reclaim to get some memory under memory pressure (thus
+ *   it can sleep). It will avoid disruptive actions like OOM killer. The
+ *   caller must handle the failure which is quite likely to happen under
+ *   heavy memory pressure. The flag is suitable when failure can easily be
+ *   handled at small cost, such as reduced throughput
+ *
+ * __GFP_RETRY_MAYFAIL: The VM implementation will retry memory reclaim
+ *   procedures that have previously failed if there is some indication
+ *   that progress has been made else where.  It can wait for other
+ *   tasks to attempt high level approaches to freeing memory such as
+ *   compaction (which removes fragmentation) and page-out.
+ *   There is still a definite limit to the number of retries, but it is
+ *   a larger limit than with __GFP_NORETRY.
+ *   Allocations with this flag may fail, but only when there is
+ *   genuinely little unused memory. While these allocations do not
+ *   directly trigger the OOM killer, their failure indicates that
+ *   the system is likely to need to use the OOM killer soon.  The
+ *   caller must handle failure, but can reasonably do so by failing
+ *   a higher-level request, or completing it only in a much less
+ *   efficient manner.
+ *   If the allocation does fail, and the caller is in a position to
+ *   free some non-essential memory, doing so could benefit the system
+ *   as a whole.
  *
  * __GFP_NOFAIL: The VM implementation _must_ retry infinitely: the caller
- *   cannot handle allocation failures. New users should be evaluated carefully
- *   (and the flag should be used only when there is no reasonable failure
- *   policy) but it is definitely preferable to use the flag rather than
- *   opencode endless loop around allocator.
- *
- * __GFP_NORETRY: The VM implementation must not retry indefinitely and will
- *   return NULL when direct reclaim and memory compaction have failed to allow
- *   the allocation to succeed.  The OOM killer is not called with the current
- *   implementation.
+ *   cannot handle allocation failures. The allocation could block
+ *   indefinitely but will never return with failure. Testing for
+ *   failure is pointless.
+ *   New users should be evaluated carefully (and the flag should be
+ *   used only when there is no reasonable failure policy) but it is
+ *   definitely preferable to use the flag rather than opencode endless
+ *   loop around allocator.
+ *   Using this flag for costly allocations is _highly_ discouraged.
  */
 #define __GFP_IO       ((__force gfp_t)___GFP_IO)
 #define __GFP_FS       ((__force gfp_t)___GFP_FS)
 #define __GFP_DIRECT_RECLAIM   ((__force gfp_t)___GFP_DIRECT_RECLAIM) /* Caller can reclaim */
 #define __GFP_KSWAPD_RECLAIM   ((__force gfp_t)___GFP_KSWAPD_RECLAIM) /* kswapd can wake */
 #define __GFP_RECLAIM ((__force gfp_t)(___GFP_DIRECT_RECLAIM|___GFP_KSWAPD_RECLAIM))
-#define __GFP_REPEAT   ((__force gfp_t)___GFP_REPEAT)
+#define __GFP_RETRY_MAYFAIL    ((__force gfp_t)___GFP_RETRY_MAYFAIL)
 #define __GFP_NOFAIL   ((__force gfp_t)___GFP_NOFAIL)
 #define __GFP_NORETRY  ((__force gfp_t)___GFP_NORETRY)
 
index 71fd92d81b266edf1781c542de550159bc33949a..5591f055e13fd0777da082846331b6d106e3a33d 100644 (file)
@@ -20,6 +20,9 @@ struct kern_ipc_perm {
        umode_t         mode;
        unsigned long   seq;
        void            *security;
+
+       struct rcu_head rcu;
+       atomic_t refcount;
 } ____cacheline_aligned_in_smp;
 
 #endif /* _LINUX_IPC_H */
index 1c91f26e2996dc4db7f3814a4e82de80907bb5a9..bd6d96cf80b17cae9b4e15cb1e05e2a455ffd88a 100644 (file)
@@ -11,6 +11,7 @@
 #include <linux/log2.h>
 #include <linux/typecheck.h>
 #include <linux/printk.h>
+#include <linux/build_bug.h>
 #include <asm/byteorder.h>
 #include <uapi/linux/kernel.h>
 
@@ -854,9 +855,12 @@ static inline void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) { }
  * @member:    the name of the member within the struct.
  *
  */
-#define container_of(ptr, type, member) ({                     \
-       const typeof( ((type *)0)->member ) *__mptr = (ptr);    \
-       (type *)( (char *)__mptr - offsetof(type,member) );})
+#define container_of(ptr, type, member) ({                             \
+       void *__mptr = (void *)(ptr);                                   \
+       BUILD_BUG_ON_MSG(!__same_type(*(ptr), ((type *)0)->member) &&   \
+                        !__same_type(*(ptr), void),                    \
+                        "pointer type mismatch in container_of()");    \
+       ((type *)(__mptr - offsetof(type, member))); })
 
 /* Rebuild everything on CONFIG_FTRACE_MCOUNT_RECORD */
 #ifdef CONFIG_FTRACE_MCOUNT_RECORD
index 65888418fb694df2d2cb93531ca5139905dd537a..dd056fab9e35c958c7aee14156e1df5640fb2556 100644 (file)
@@ -172,6 +172,7 @@ struct kimage {
        unsigned long start;
        struct page *control_code_page;
        struct page *swap_page;
+       void *vmcoreinfo_data_copy; /* locates in the crash memory */
 
        unsigned long nr_segments;
        struct kexec_segment segment[KEXEC_SEGMENT_MAX];
@@ -241,6 +242,7 @@ extern void crash_kexec(struct pt_regs *);
 int kexec_should_crash(struct task_struct *);
 int kexec_crash_loaded(void);
 void crash_save_cpu(struct pt_regs *regs, int cpu);
+extern int kimage_crash_copy_vmcoreinfo(struct kimage *image);
 
 extern struct kimage *kexec_image;
 extern struct kimage *kexec_crash_image;
index 4634da521238f18d76155d8e92870f587f6da163..3e0d405dc842d5dd65b28cb710b4ce38008ca64b 100644 (file)
@@ -34,7 +34,7 @@ extern char *migrate_reason_names[MR_TYPES];
 static inline struct page *new_page_nodemask(struct page *page,
                                int preferred_nid, nodemask_t *nodemask)
 {
-       gfp_t gfp_mask = GFP_USER | __GFP_MOVABLE;
+       gfp_t gfp_mask = GFP_USER | __GFP_MOVABLE | __GFP_RETRY_MAYFAIL;
 
        if (PageHuge(page))
                return alloc_huge_page_nodemask(page_hstate(compound_head(page)),
index aa3cd0878270380cdee0b8d26be908594244ad0b..8aa01fd859fb84e5f64e9d7ffb0c20957afdfd2d 100644 (file)
@@ -6,18 +6,26 @@
 
 #include <linux/sched.h>
 #include <asm/irq.h>
+#if defined(CONFIG_HAVE_NMI_WATCHDOG)
+#include <asm/nmi.h>
+#endif
 
 #ifdef CONFIG_LOCKUP_DETECTOR
+void lockup_detector_init(void);
+#else
+static inline void lockup_detector_init(void)
+{
+}
+#endif
+
+#ifdef CONFIG_SOFTLOCKUP_DETECTOR
 extern void touch_softlockup_watchdog_sched(void);
 extern void touch_softlockup_watchdog(void);
 extern void touch_softlockup_watchdog_sync(void);
 extern void touch_all_softlockup_watchdogs(void);
-extern int proc_dowatchdog_thresh(struct ctl_table *table, int write,
-                                 void __user *buffer,
-                                 size_t *lenp, loff_t *ppos);
 extern unsigned int  softlockup_panic;
-extern unsigned int  hardlockup_panic;
-void lockup_detector_init(void);
+extern int soft_watchdog_enabled;
+extern atomic_t watchdog_park_in_progress;
 #else
 static inline void touch_softlockup_watchdog_sched(void)
 {
@@ -31,9 +39,6 @@ static inline void touch_softlockup_watchdog_sync(void)
 static inline void touch_all_softlockup_watchdogs(void)
 {
 }
-static inline void lockup_detector_init(void)
-{
-}
 #endif
 
 #ifdef CONFIG_DETECT_HUNG_TASK
@@ -61,6 +66,21 @@ static inline void reset_hung_task_detector(void)
 #define NMI_WATCHDOG_ENABLED      (1 << NMI_WATCHDOG_ENABLED_BIT)
 #define SOFT_WATCHDOG_ENABLED     (1 << SOFT_WATCHDOG_ENABLED_BIT)
 
+#if defined(CONFIG_HARDLOCKUP_DETECTOR)
+extern void hardlockup_detector_disable(void);
+extern unsigned int hardlockup_panic;
+#else
+static inline void hardlockup_detector_disable(void) {}
+#endif
+
+#if defined(CONFIG_HARDLOCKUP_DETECTOR_PERF)
+extern void arch_touch_nmi_watchdog(void);
+#else
+#if !defined(CONFIG_HAVE_NMI_WATCHDOG)
+static inline void arch_touch_nmi_watchdog(void) {}
+#endif
+#endif
+
 /**
  * touch_nmi_watchdog - restart NMI watchdog timeout.
  * 
@@ -68,21 +88,11 @@ static inline void reset_hung_task_detector(void)
  * may be used to reset the timeout - for code which intentionally
  * disables interrupts for a long time. This call is stateless.
  */
-#if defined(CONFIG_HAVE_NMI_WATCHDOG) || defined(CONFIG_HARDLOCKUP_DETECTOR)
-#include <asm/nmi.h>
-extern void touch_nmi_watchdog(void);
-#else
 static inline void touch_nmi_watchdog(void)
 {
+       arch_touch_nmi_watchdog();
        touch_softlockup_watchdog();
 }
-#endif
-
-#if defined(CONFIG_HARDLOCKUP_DETECTOR)
-extern void hardlockup_detector_disable(void);
-#else
-static inline void hardlockup_detector_disable(void) {}
-#endif
 
 /*
  * Create trigger_all_cpu_backtrace() out of the arch-provided
@@ -139,15 +149,18 @@ static inline bool trigger_single_cpu_backtrace(int cpu)
 }
 #endif
 
-#ifdef CONFIG_LOCKUP_DETECTOR
+#ifdef CONFIG_HARDLOCKUP_DETECTOR_PERF
 u64 hw_nmi_get_sample_period(int watchdog_thresh);
+#endif
+
+#ifdef CONFIG_LOCKUP_DETECTOR
 extern int nmi_watchdog_enabled;
-extern int soft_watchdog_enabled;
 extern int watchdog_user_enabled;
 extern int watchdog_thresh;
 extern unsigned long watchdog_enabled;
+extern struct cpumask watchdog_cpumask;
 extern unsigned long *watchdog_cpumask_bits;
-extern atomic_t watchdog_park_in_progress;
+extern int __read_mostly watchdog_suspended;
 #ifdef CONFIG_SMP
 extern int sysctl_softlockup_all_cpu_backtrace;
 extern int sysctl_hardlockup_all_cpu_backtrace;
index 7fa1fbefc3f229618f837be52a5ee3b619ec1f9c..cc7554ae6e8bd6dbe3985fc5fc23fa8b6d68e765 100644 (file)
@@ -31,7 +31,7 @@ struct s3c2410_hcd_info {
        void            (*report_oc)(struct s3c2410_hcd_info *, int ports);
 };
 
-static void inline s3c2410_usb_report_oc(struct s3c2410_hcd_info *info, int ports)
+static inline void s3c2410_usb_report_oc(struct s3c2410_hcd_info *info, int ports)
 {
        if (info->report_oc != NULL) {
                (info->report_oc)(info, ports);
index ed5c3838780de5ba9509071bef56e8d521dc5782..1fa0dc880bd7878e70200264e2486a579dfe1332 100644 (file)
@@ -57,6 +57,27 @@ static inline unsigned long get_random_long(void)
 #endif
 }
 
+/*
+ * On 64-bit architectures, protect against non-terminated C string overflows
+ * by zeroing out the first byte of the canary; this leaves 56 bits of entropy.
+ */
+#ifdef CONFIG_64BIT
+# ifdef __LITTLE_ENDIAN
+#  define CANARY_MASK 0xffffffffffffff00UL
+# else /* big endian, 64 bits: */
+#  define CANARY_MASK 0x00ffffffffffffffUL
+# endif
+#else /* 32 bits: */
+# define CANARY_MASK 0xffffffffUL
+#endif
+
+static inline unsigned long get_random_canary(void)
+{
+       unsigned long val = get_random_long();
+
+       return val & CANARY_MASK;
+}
+
 unsigned long randomize_page(unsigned long start, unsigned long range);
 
 u32 prandom_u32(void);
index 20814b7d7d70ebb40471e1eff9a27f865db0f635..3822d749fc9ee8263ba8daadde5cec5edfb7516d 100644 (file)
@@ -974,6 +974,7 @@ struct task_struct {
 
 #ifdef CONFIG_FAULT_INJECTION
        int                             make_it_fail;
+       int fail_nth;
 #endif
        /*
         * When (nr_dirtied >= nr_dirtied_pause), it's time to call
index 9edec926e9d968a3900ec0f683b039a6cf6f6ef7..be5cf2ea14ade9d40e88114ea27a5c876423c8a0 100644 (file)
@@ -8,11 +8,29 @@
 
 struct task_struct;
 
+/* One semaphore structure for each semaphore in the system. */
+struct sem {
+       int     semval;         /* current value */
+       /*
+        * PID of the process that last modified the semaphore. For
+        * Linux, specifically these are:
+        *  - semop
+        *  - semctl, via SETVAL and SETALL.
+        *  - at task exit when performing undo adjustments (see exit_sem).
+        */
+       int     sempid;
+       spinlock_t      lock;   /* spinlock for fine-grained semtimedop */
+       struct list_head pending_alter; /* pending single-sop operations */
+                                       /* that alter the semaphore */
+       struct list_head pending_const; /* pending single-sop operations */
+                                       /* that do not alter the semaphore*/
+       time_t  sem_otime;      /* candidate for sem_otime */
+} ____cacheline_aligned_in_smp;
+
 /* One sem_array data structure for each set of semaphores in the system. */
 struct sem_array {
        struct kern_ipc_perm    sem_perm;       /* permissions .. see ipc.h */
-       time_t                  sem_ctime;      /* last change time */
-       struct sem              *sem_base;      /* ptr to first semaphore in array */
+       time_t                  sem_ctime;      /* create/last semctl() time */
        struct list_head        pending_alter;  /* pending operations */
                                                /* that alter the array */
        struct list_head        pending_const;  /* pending complex operations */
@@ -21,6 +39,8 @@ struct sem_array {
        int                     sem_nsems;      /* no. of semaphores in array */
        int                     complex_count;  /* pending complex operations */
        unsigned int            use_global_lock;/* >0: global lock required */
+
+       struct sem              sems[];
 };
 
 #ifdef CONFIG_SYSVIPC
index 04a7f7993e678d6454b6efaa608db3b88ea78eec..41473df6dfb074a8c0a193716b0ac2a1d43151d3 100644 (file)
@@ -471,7 +471,8 @@ static __always_inline void *kmalloc_large(size_t size, gfp_t flags)
  *
  * %__GFP_NOWARN - If allocation fails, don't issue any warnings.
  *
- * %__GFP_REPEAT - If allocation fails initially, try once more before failing.
+ * %__GFP_RETRY_MAYFAIL - Try really hard to succeed the allocation but fail
+ *   eventually.
  *
  * There are other flags available as well, but these are not intended
  * for general use, and so are not documented here. For a full list of
index 7439d83eaa3365f3f67cbb70694a168554ac17b7..96f5a5fd037747cb22ec6e815d090449eb3d9900 100644 (file)
@@ -193,4 +193,204 @@ static inline const char *kbasename(const char *path)
        return tail ? tail + 1 : path;
 }
 
+#define __FORTIFY_INLINE extern __always_inline __attribute__((gnu_inline))
+#define __RENAME(x) __asm__(#x)
+
+void fortify_panic(const char *name) __noreturn __cold;
+void __read_overflow(void) __compiletime_error("detected read beyond size of object passed as 1st parameter");
+void __read_overflow2(void) __compiletime_error("detected read beyond size of object passed as 2nd parameter");
+void __write_overflow(void) __compiletime_error("detected write beyond size of object passed as 1st parameter");
+
+#if !defined(__NO_FORTIFY) && defined(__OPTIMIZE__) && defined(CONFIG_FORTIFY_SOURCE)
+__FORTIFY_INLINE char *strcpy(char *p, const char *q)
+{
+       size_t p_size = __builtin_object_size(p, 0);
+       size_t q_size = __builtin_object_size(q, 0);
+       if (p_size == (size_t)-1 && q_size == (size_t)-1)
+               return __builtin_strcpy(p, q);
+       if (strscpy(p, q, p_size < q_size ? p_size : q_size) < 0)
+               fortify_panic(__func__);
+       return p;
+}
+
+__FORTIFY_INLINE char *strncpy(char *p, const char *q, __kernel_size_t size)
+{
+       size_t p_size = __builtin_object_size(p, 0);
+       if (__builtin_constant_p(size) && p_size < size)
+               __write_overflow();
+       if (p_size < size)
+               fortify_panic(__func__);
+       return __builtin_strncpy(p, q, size);
+}
+
+__FORTIFY_INLINE char *strcat(char *p, const char *q)
+{
+       size_t p_size = __builtin_object_size(p, 0);
+       if (p_size == (size_t)-1)
+               return __builtin_strcat(p, q);
+       if (strlcat(p, q, p_size) >= p_size)
+               fortify_panic(__func__);
+       return p;
+}
+
+__FORTIFY_INLINE __kernel_size_t strlen(const char *p)
+{
+       __kernel_size_t ret;
+       size_t p_size = __builtin_object_size(p, 0);
+       if (p_size == (size_t)-1)
+               return __builtin_strlen(p);
+       ret = strnlen(p, p_size);
+       if (p_size <= ret)
+               fortify_panic(__func__);
+       return ret;
+}
+
+extern __kernel_size_t __real_strnlen(const char *, __kernel_size_t) __RENAME(strnlen);
+__FORTIFY_INLINE __kernel_size_t strnlen(const char *p, __kernel_size_t maxlen)
+{
+       size_t p_size = __builtin_object_size(p, 0);
+       __kernel_size_t ret = __real_strnlen(p, maxlen < p_size ? maxlen : p_size);
+       if (p_size <= ret && maxlen != ret)
+               fortify_panic(__func__);
+       return ret;
+}
+
+/* defined after fortified strlen to reuse it */
+extern size_t __real_strlcpy(char *, const char *, size_t) __RENAME(strlcpy);
+__FORTIFY_INLINE size_t strlcpy(char *p, const char *q, size_t size)
+{
+       size_t ret;
+       size_t p_size = __builtin_object_size(p, 0);
+       size_t q_size = __builtin_object_size(q, 0);
+       if (p_size == (size_t)-1 && q_size == (size_t)-1)
+               return __real_strlcpy(p, q, size);
+       ret = strlen(q);
+       if (size) {
+               size_t len = (ret >= size) ? size - 1 : ret;
+               if (__builtin_constant_p(len) && len >= p_size)
+                       __write_overflow();
+               if (len >= p_size)
+                       fortify_panic(__func__);
+               __builtin_memcpy(p, q, len);
+               p[len] = '\0';
+       }
+       return ret;
+}
+
+/* defined after fortified strlen and strnlen to reuse them */
+__FORTIFY_INLINE char *strncat(char *p, const char *q, __kernel_size_t count)
+{
+       size_t p_len, copy_len;
+       size_t p_size = __builtin_object_size(p, 0);
+       size_t q_size = __builtin_object_size(q, 0);
+       if (p_size == (size_t)-1 && q_size == (size_t)-1)
+               return __builtin_strncat(p, q, count);
+       p_len = strlen(p);
+       copy_len = strnlen(q, count);
+       if (p_size < p_len + copy_len + 1)
+               fortify_panic(__func__);
+       __builtin_memcpy(p + p_len, q, copy_len);
+       p[p_len + copy_len] = '\0';
+       return p;
+}
+
+__FORTIFY_INLINE void *memset(void *p, int c, __kernel_size_t size)
+{
+       size_t p_size = __builtin_object_size(p, 0);
+       if (__builtin_constant_p(size) && p_size < size)
+               __write_overflow();
+       if (p_size < size)
+               fortify_panic(__func__);
+       return __builtin_memset(p, c, size);
+}
+
+__FORTIFY_INLINE void *memcpy(void *p, const void *q, __kernel_size_t size)
+{
+       size_t p_size = __builtin_object_size(p, 0);
+       size_t q_size = __builtin_object_size(q, 0);
+       if (__builtin_constant_p(size)) {
+               if (p_size < size)
+                       __write_overflow();
+               if (q_size < size)
+                       __read_overflow2();
+       }
+       if (p_size < size || q_size < size)
+               fortify_panic(__func__);
+       return __builtin_memcpy(p, q, size);
+}
+
+__FORTIFY_INLINE void *memmove(void *p, const void *q, __kernel_size_t size)
+{
+       size_t p_size = __builtin_object_size(p, 0);
+       size_t q_size = __builtin_object_size(q, 0);
+       if (__builtin_constant_p(size)) {
+               if (p_size < size)
+                       __write_overflow();
+               if (q_size < size)
+                       __read_overflow2();
+       }
+       if (p_size < size || q_size < size)
+               fortify_panic(__func__);
+       return __builtin_memmove(p, q, size);
+}
+
+extern void *__real_memscan(void *, int, __kernel_size_t) __RENAME(memscan);
+__FORTIFY_INLINE void *memscan(void *p, int c, __kernel_size_t size)
+{
+       size_t p_size = __builtin_object_size(p, 0);
+       if (__builtin_constant_p(size) && p_size < size)
+               __read_overflow();
+       if (p_size < size)
+               fortify_panic(__func__);
+       return __real_memscan(p, c, size);
+}
+
+__FORTIFY_INLINE int memcmp(const void *p, const void *q, __kernel_size_t size)
+{
+       size_t p_size = __builtin_object_size(p, 0);
+       size_t q_size = __builtin_object_size(q, 0);
+       if (__builtin_constant_p(size)) {
+               if (p_size < size)
+                       __read_overflow();
+               if (q_size < size)
+                       __read_overflow2();
+       }
+       if (p_size < size || q_size < size)
+               fortify_panic(__func__);
+       return __builtin_memcmp(p, q, size);
+}
+
+__FORTIFY_INLINE void *memchr(const void *p, int c, __kernel_size_t size)
+{
+       size_t p_size = __builtin_object_size(p, 0);
+       if (__builtin_constant_p(size) && p_size < size)
+               __read_overflow();
+       if (p_size < size)
+               fortify_panic(__func__);
+       return __builtin_memchr(p, c, size);
+}
+
+void *__real_memchr_inv(const void *s, int c, size_t n) __RENAME(memchr_inv);
+__FORTIFY_INLINE void *memchr_inv(const void *p, int c, size_t size)
+{
+       size_t p_size = __builtin_object_size(p, 0);
+       if (__builtin_constant_p(size) && p_size < size)
+               __read_overflow();
+       if (p_size < size)
+               fortify_panic(__func__);
+       return __real_memchr_inv(p, c, size);
+}
+
+extern void *__real_kmemdup(const void *src, size_t len, gfp_t gfp) __RENAME(kmemdup);
+__FORTIFY_INLINE void *kmemdup(const void *p, size_t size, gfp_t gfp)
+{
+       size_t p_size = __builtin_object_size(p, 0);
+       if (__builtin_constant_p(size) && p_size < size)
+               __read_overflow();
+       if (p_size < size)
+               fortify_panic(__func__);
+       return __real_kmemdup(p, size, gfp);
+}
+#endif
+
 #endif /* _LINUX_STRING_H_ */
index 1c04a26bfd2f017408e4d476f073788c7139fc21..3a89b9ff4cdc586a7f657f57ef816a431599aeda 100644 (file)
@@ -47,6 +47,9 @@ extern int proc_douintvec(struct ctl_table *, int,
                         void __user *, size_t *, loff_t *);
 extern int proc_dointvec_minmax(struct ctl_table *, int,
                                void __user *, size_t *, loff_t *);
+extern int proc_douintvec_minmax(struct ctl_table *table, int write,
+                                void __user *buffer, size_t *lenp,
+                                loff_t *ppos);
 extern int proc_dointvec_jiffies(struct ctl_table *, int,
                                 void __user *, size_t *, loff_t *);
 extern int proc_dointvec_userhz_jiffies(struct ctl_table *, int,
index 10e3663a75a6722f0c7108d3e3ba87080e582102..8e50d01c645fc7522dc0d6f74e80098c3c36f3dc 100644 (file)
@@ -34,7 +34,7 @@
        {(unsigned long)__GFP_FS,               "__GFP_FS"},            \
        {(unsigned long)__GFP_COLD,             "__GFP_COLD"},          \
        {(unsigned long)__GFP_NOWARN,           "__GFP_NOWARN"},        \
-       {(unsigned long)__GFP_REPEAT,           "__GFP_REPEAT"},        \
+       {(unsigned long)__GFP_RETRY_MAYFAIL,    "__GFP_RETRY_MAYFAIL"}, \
        {(unsigned long)__GFP_NOFAIL,           "__GFP_NOFAIL"},        \
        {(unsigned long)__GFP_NORETRY,          "__GFP_NORETRY"},       \
        {(unsigned long)__GFP_COMP,             "__GFP_COMP"},          \
index 84df14b373601486bdc2b684c49a2ae8b25e3147..481e103da78ed42a5a76447e836ee8374541d7c7 100644 (file)
@@ -1,6 +1,8 @@
 #ifndef _UAPI_LINUX_KCMP_H
 #define _UAPI_LINUX_KCMP_H
 
+#include <linux/types.h>
+
 /* Comparison type */
 enum kcmp_type {
        KCMP_FILE,
@@ -10,8 +12,16 @@ enum kcmp_type {
        KCMP_SIGHAND,
        KCMP_IO,
        KCMP_SYSVSEM,
+       KCMP_EPOLL_TFD,
 
        KCMP_TYPES,
 };
 
+/* Slot for KCMP_EPOLL_TFD */
+struct kcmp_epoll_slot {
+       __u32 efd;              /* epoll file descriptor */
+       __u32 tfd;              /* target file number */
+       __u32 toff;             /* target offset within same numbered sequence */
+};
+
 #endif /* _UAPI_LINUX_KCMP_H */
index dd73b908b2f3225d7a7a0611c97cca8eb36295ee..67eb90361692fba63a0729b188011d02b39596ab 100644 (file)
@@ -23,7 +23,7 @@
 struct semid_ds {
        struct ipc_perm sem_perm;               /* permissions .. see ipc.h */
        __kernel_time_t sem_otime;              /* last semop time */
-       __kernel_time_t sem_ctime;              /* last change time */
+       __kernel_time_t sem_ctime;              /* create/last semctl() time */
        struct sem      *sem_base;              /* ptr to first semaphore in array */
        struct sem_queue *sem_pending;          /* pending operations to be processed */
        struct sem_queue **sem_pending_last;    /* last pending operation */
index df58a416dd1da54df4c3f2be807adb5366ec35b2..052481fbe3633f64b420c5bbd6deea3be261e6a9 100644 (file)
@@ -518,6 +518,7 @@ asmlinkage __visible void __init start_kernel(void)
        /*
         * Set up the initial canary ASAP:
         */
+       add_latent_entropy();
        boot_init_stack_canary();
 
        cgroup_init_early();
index 104926dc72be4e9ae53fbecbbd6e05ee1f81cda3..5b25e0755656a6a2b85a7f647bf77cd87db77a5a 100644 (file)
--- a/ipc/msg.c
+++ b/ipc/msg.c
@@ -97,11 +97,11 @@ static inline void msg_rmid(struct ipc_namespace *ns, struct msg_queue *s)
 
 static void msg_rcu_free(struct rcu_head *head)
 {
-       struct ipc_rcu *p = container_of(head, struct ipc_rcu, rcu);
-       struct msg_queue *msq = ipc_rcu_to_struct(p);
+       struct kern_ipc_perm *p = container_of(head, struct kern_ipc_perm, rcu);
+       struct msg_queue *msq = container_of(p, struct msg_queue, q_perm);
 
        security_msg_queue_free(msq);
-       ipc_rcu_free(head);
+       kvfree(msq);
 }
 
 /**
@@ -114,12 +114,12 @@ static void msg_rcu_free(struct rcu_head *head)
 static int newque(struct ipc_namespace *ns, struct ipc_params *params)
 {
        struct msg_queue *msq;
-       int id, retval;
+       int retval;
        key_t key = params->key;
        int msgflg = params->flg;
 
-       msq = ipc_rcu_alloc(sizeof(*msq));
-       if (!msq)
+       msq = kvmalloc(sizeof(*msq), GFP_KERNEL);
+       if (unlikely(!msq))
                return -ENOMEM;
 
        msq->q_perm.mode = msgflg & S_IRWXUGO;
@@ -128,7 +128,7 @@ static int newque(struct ipc_namespace *ns, struct ipc_params *params)
        msq->q_perm.security = NULL;
        retval = security_msg_queue_alloc(msq);
        if (retval) {
-               ipc_rcu_putref(msq, ipc_rcu_free);
+               kvfree(msq);
                return retval;
        }
 
@@ -142,10 +142,10 @@ static int newque(struct ipc_namespace *ns, struct ipc_params *params)
        INIT_LIST_HEAD(&msq->q_senders);
 
        /* ipc_addid() locks msq upon success. */
-       id = ipc_addid(&msg_ids(ns), &msq->q_perm, ns->msg_ctlmni);
-       if (id < 0) {
-               ipc_rcu_putref(msq, msg_rcu_free);
-               return id;
+       retval = ipc_addid(&msg_ids(ns), &msq->q_perm, ns->msg_ctlmni);
+       if (retval < 0) {
+               call_rcu(&msq->q_perm.rcu, msg_rcu_free);
+               return retval;
        }
 
        ipc_unlock_object(&msq->q_perm);
@@ -249,7 +249,7 @@ static void freeque(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp)
                free_msg(msg);
        }
        atomic_sub(msq->q_cbytes, &ns->msg_bytes);
-       ipc_rcu_putref(msq, msg_rcu_free);
+       ipc_rcu_putref(&msq->q_perm, msg_rcu_free);
 }
 
 /*
@@ -688,7 +688,7 @@ long do_msgsnd(int msqid, long mtype, void __user *mtext,
                /* enqueue the sender and prepare to block */
                ss_add(msq, &s, msgsz);
 
-               if (!ipc_rcu_getref(msq)) {
+               if (!ipc_rcu_getref(&msq->q_perm)) {
                        err = -EIDRM;
                        goto out_unlock0;
                }
@@ -700,7 +700,7 @@ long do_msgsnd(int msqid, long mtype, void __user *mtext,
                rcu_read_lock();
                ipc_lock_object(&msq->q_perm);
 
-               ipc_rcu_putref(msq, msg_rcu_free);
+               ipc_rcu_putref(&msq->q_perm, msg_rcu_free);
                /* raced with RMID? */
                if (!ipc_valid_object(&msq->q_perm)) {
                        err = -EIDRM;
index 947dc2348271f9b8b373e098932c1e4e351806de..9e70cd7a17da7e74e915ce7f4061aa99aea66b89 100644 (file)
--- a/ipc/sem.c
+++ b/ipc/sem.c
 #include <linux/uaccess.h>
 #include "util.h"
 
-/* One semaphore structure for each semaphore in the system. */
-struct sem {
-       int     semval;         /* current value */
-       /*
-        * PID of the process that last modified the semaphore. For
-        * Linux, specifically these are:
-        *  - semop
-        *  - semctl, via SETVAL and SETALL.
-        *  - at task exit when performing undo adjustments (see exit_sem).
-        */
-       int     sempid;
-       spinlock_t      lock;   /* spinlock for fine-grained semtimedop */
-       struct list_head pending_alter; /* pending single-sop operations */
-                                       /* that alter the semaphore */
-       struct list_head pending_const; /* pending single-sop operations */
-                                       /* that do not alter the semaphore*/
-       time_t  sem_otime;      /* candidate for sem_otime */
-} ____cacheline_aligned_in_smp;
 
 /* One queue for each sleeping process in the system. */
 struct sem_queue {
@@ -175,7 +157,7 @@ static int sysvipc_sem_proc_show(struct seq_file *s, void *it);
  *     sem_array.sem_undo
  *
  * b) global or semaphore sem_lock() for read/write:
- *     sem_array.sem_base[i].pending_{const,alter}:
+ *     sem_array.sems[i].pending_{const,alter}:
  *
  * c) special:
  *     sem_undo_list.list_proc:
@@ -250,7 +232,7 @@ static void unmerge_queues(struct sem_array *sma)
         */
        list_for_each_entry_safe(q, tq, &sma->pending_alter, list) {
                struct sem *curr;
-               curr = &sma->sem_base[q->sops[0].sem_num];
+               curr = &sma->sems[q->sops[0].sem_num];
 
                list_add_tail(&q->list, &curr->pending_alter);
        }
@@ -270,7 +252,7 @@ static void merge_queues(struct sem_array *sma)
 {
        int i;
        for (i = 0; i < sma->sem_nsems; i++) {
-               struct sem *sem = sma->sem_base + i;
+               struct sem *sem = &sma->sems[i];
 
                list_splice_init(&sem->pending_alter, &sma->pending_alter);
        }
@@ -278,11 +260,11 @@ static void merge_queues(struct sem_array *sma)
 
 static void sem_rcu_free(struct rcu_head *head)
 {
-       struct ipc_rcu *p = container_of(head, struct ipc_rcu, rcu);
-       struct sem_array *sma = ipc_rcu_to_struct(p);
+       struct kern_ipc_perm *p = container_of(head, struct kern_ipc_perm, rcu);
+       struct sem_array *sma = container_of(p, struct sem_array, sem_perm);
 
        security_sem_free(sma);
-       ipc_rcu_free(head);
+       kvfree(sma);
 }
 
 /*
@@ -306,7 +288,7 @@ static void complexmode_enter(struct sem_array *sma)
        sma->use_global_lock = USE_GLOBAL_LOCK_HYSTERESIS;
 
        for (i = 0; i < sma->sem_nsems; i++) {
-               sem = sma->sem_base + i;
+               sem = &sma->sems[i];
                spin_lock(&sem->lock);
                spin_unlock(&sem->lock);
        }
@@ -366,7 +348,7 @@ static inline int sem_lock(struct sem_array *sma, struct sembuf *sops,
         *
         * Both facts are tracked by use_global_mode.
         */
-       sem = sma->sem_base + sops->sem_num;
+       sem = &sma->sems[sops->sem_num];
 
        /*
         * Initial check for use_global_lock. Just an optimization,
@@ -421,7 +403,7 @@ static inline void sem_unlock(struct sem_array *sma, int locknum)
                complexmode_tryleave(sma);
                ipc_unlock_object(&sma->sem_perm);
        } else {
-               struct sem *sem = sma->sem_base + locknum;
+               struct sem *sem = &sma->sems[locknum];
                spin_unlock(&sem->lock);
        }
 }
@@ -456,7 +438,7 @@ static inline struct sem_array *sem_obtain_object_check(struct ipc_namespace *ns
 static inline void sem_lock_and_putref(struct sem_array *sma)
 {
        sem_lock(sma, NULL, -1);
-       ipc_rcu_putref(sma, sem_rcu_free);
+       ipc_rcu_putref(&sma->sem_perm, sem_rcu_free);
 }
 
 static inline void sem_rmid(struct ipc_namespace *ns, struct sem_array *s)
@@ -464,6 +446,24 @@ static inline void sem_rmid(struct ipc_namespace *ns, struct sem_array *s)
        ipc_rmid(&sem_ids(ns), &s->sem_perm);
 }
 
+static struct sem_array *sem_alloc(size_t nsems)
+{
+       struct sem_array *sma;
+       size_t size;
+
+       if (nsems > (INT_MAX - sizeof(*sma)) / sizeof(sma->sems[0]))
+               return NULL;
+
+       size = sizeof(*sma) + nsems * sizeof(sma->sems[0]);
+       sma = kvmalloc(size, GFP_KERNEL);
+       if (unlikely(!sma))
+               return NULL;
+
+       memset(sma, 0, size);
+
+       return sma;
+}
+
 /**
  * newary - Create a new semaphore set
  * @ns: namespace
@@ -473,10 +473,8 @@ static inline void sem_rmid(struct ipc_namespace *ns, struct sem_array *s)
  */
 static int newary(struct ipc_namespace *ns, struct ipc_params *params)
 {
-       int id;
        int retval;
        struct sem_array *sma;
-       int size;
        key_t key = params->key;
        int nsems = params->u.nsems;
        int semflg = params->flg;
@@ -487,29 +485,24 @@ static int newary(struct ipc_namespace *ns, struct ipc_params *params)
        if (ns->used_sems + nsems > ns->sc_semmns)
                return -ENOSPC;
 
-       size = sizeof(*sma) + nsems * sizeof(struct sem);
-       sma = ipc_rcu_alloc(size);
+       sma = sem_alloc(nsems);
        if (!sma)
                return -ENOMEM;
 
-       memset(sma, 0, size);
-
        sma->sem_perm.mode = (semflg & S_IRWXUGO);
        sma->sem_perm.key = key;
 
        sma->sem_perm.security = NULL;
        retval = security_sem_alloc(sma);
        if (retval) {
-               ipc_rcu_putref(sma, ipc_rcu_free);
+               kvfree(sma);
                return retval;
        }
 
-       sma->sem_base = (struct sem *) &sma[1];
-
        for (i = 0; i < nsems; i++) {
-               INIT_LIST_HEAD(&sma->sem_base[i].pending_alter);
-               INIT_LIST_HEAD(&sma->sem_base[i].pending_const);
-               spin_lock_init(&sma->sem_base[i].lock);
+               INIT_LIST_HEAD(&sma->sems[i].pending_alter);
+               INIT_LIST_HEAD(&sma->sems[i].pending_const);
+               spin_lock_init(&sma->sems[i].lock);
        }
 
        sma->complex_count = 0;
@@ -520,10 +513,10 @@ static int newary(struct ipc_namespace *ns, struct ipc_params *params)
        sma->sem_nsems = nsems;
        sma->sem_ctime = get_seconds();
 
-       id = ipc_addid(&sem_ids(ns), &sma->sem_perm, ns->sc_semmni);
-       if (id < 0) {
-               ipc_rcu_putref(sma, sem_rcu_free);
-               return id;
+       retval = ipc_addid(&sem_ids(ns), &sma->sem_perm, ns->sc_semmni);
+       if (retval < 0) {
+               call_rcu(&sma->sem_perm.rcu, sem_rcu_free);
+               return retval;
        }
        ns->used_sems += nsems;
 
@@ -612,7 +605,7 @@ static int perform_atomic_semop_slow(struct sem_array *sma, struct sem_queue *q)
        un = q->undo;
 
        for (sop = sops; sop < sops + nsops; sop++) {
-               curr = sma->sem_base + sop->sem_num;
+               curr = &sma->sems[sop->sem_num];
                sem_op = sop->sem_op;
                result = curr->semval;
 
@@ -639,7 +632,7 @@ static int perform_atomic_semop_slow(struct sem_array *sma, struct sem_queue *q)
        sop--;
        pid = q->pid;
        while (sop >= sops) {
-               sma->sem_base[sop->sem_num].sempid = pid;
+               sma->sems[sop->sem_num].sempid = pid;
                sop--;
        }
 
@@ -661,7 +654,7 @@ undo:
        sop--;
        while (sop >= sops) {
                sem_op = sop->sem_op;
-               sma->sem_base[sop->sem_num].semval -= sem_op;
+               sma->sems[sop->sem_num].semval -= sem_op;
                if (sop->sem_flg & SEM_UNDO)
                        un->semadj[sop->sem_num] += sem_op;
                sop--;
@@ -692,7 +685,7 @@ static int perform_atomic_semop(struct sem_array *sma, struct sem_queue *q)
         * until the operations can go through.
         */
        for (sop = sops; sop < sops + nsops; sop++) {
-               curr = sma->sem_base + sop->sem_num;
+               curr = &sma->sems[sop->sem_num];
                sem_op = sop->sem_op;
                result = curr->semval;
 
@@ -716,7 +709,7 @@ static int perform_atomic_semop(struct sem_array *sma, struct sem_queue *q)
        }
 
        for (sop = sops; sop < sops + nsops; sop++) {
-               curr = sma->sem_base + sop->sem_num;
+               curr = &sma->sems[sop->sem_num];
                sem_op = sop->sem_op;
                result = curr->semval;
 
@@ -815,7 +808,7 @@ static int wake_const_ops(struct sem_array *sma, int semnum,
        if (semnum == -1)
                pending_list = &sma->pending_const;
        else
-               pending_list = &sma->sem_base[semnum].pending_const;
+               pending_list = &sma->sems[semnum].pending_const;
 
        list_for_each_entry_safe(q, tmp, pending_list, list) {
                int error = perform_atomic_semop(sma, q);
@@ -856,7 +849,7 @@ static int do_smart_wakeup_zero(struct sem_array *sma, struct sembuf *sops,
                for (i = 0; i < nsops; i++) {
                        int num = sops[i].sem_num;
 
-                       if (sma->sem_base[num].semval == 0) {
+                       if (sma->sems[num].semval == 0) {
                                got_zero = 1;
                                semop_completed |= wake_const_ops(sma, num, wake_q);
                        }
@@ -867,7 +860,7 @@ static int do_smart_wakeup_zero(struct sem_array *sma, struct sembuf *sops,
                 * Assume all were changed.
                 */
                for (i = 0; i < sma->sem_nsems; i++) {
-                       if (sma->sem_base[i].semval == 0) {
+                       if (sma->sems[i].semval == 0) {
                                got_zero = 1;
                                semop_completed |= wake_const_ops(sma, i, wake_q);
                        }
@@ -909,7 +902,7 @@ static int update_queue(struct sem_array *sma, int semnum, struct wake_q_head *w
        if (semnum == -1)
                pending_list = &sma->pending_alter;
        else
-               pending_list = &sma->sem_base[semnum].pending_alter;
+               pending_list = &sma->sems[semnum].pending_alter;
 
 again:
        list_for_each_entry_safe(q, tmp, pending_list, list) {
@@ -922,7 +915,7 @@ again:
                 * be in the  per semaphore pending queue, and decrements
                 * cannot be successful if the value is already 0.
                 */
-               if (semnum != -1 && sma->sem_base[semnum].semval == 0)
+               if (semnum != -1 && sma->sems[semnum].semval == 0)
                        break;
 
                error = perform_atomic_semop(sma, q);
@@ -959,9 +952,9 @@ again:
 static void set_semotime(struct sem_array *sma, struct sembuf *sops)
 {
        if (sops == NULL) {
-               sma->sem_base[0].sem_otime = get_seconds();
+               sma->sems[0].sem_otime = get_seconds();
        } else {
-               sma->sem_base[sops[0].sem_num].sem_otime =
+               sma->sems[sops[0].sem_num].sem_otime =
                                                        get_seconds();
        }
 }
@@ -1067,9 +1060,9 @@ static int count_semcnt(struct sem_array *sma, ushort semnum,
        semcnt = 0;
        /* First: check the simple operations. They are easy to evaluate */
        if (count_zero)
-               l = &sma->sem_base[semnum].pending_const;
+               l = &sma->sems[semnum].pending_const;
        else
-               l = &sma->sem_base[semnum].pending_alter;
+               l = &sma->sems[semnum].pending_alter;
 
        list_for_each_entry(q, l, list) {
                /* all task on a per-semaphore list sleep on exactly
@@ -1124,7 +1117,7 @@ static void freeary(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp)
                wake_up_sem_queue_prepare(q, -EIDRM, &wake_q);
        }
        for (i = 0; i < sma->sem_nsems; i++) {
-               struct sem *sem = sma->sem_base + i;
+               struct sem *sem = &sma->sems[i];
                list_for_each_entry_safe(q, tq, &sem->pending_const, list) {
                        unlink_queue(sma, q);
                        wake_up_sem_queue_prepare(q, -EIDRM, &wake_q);
@@ -1142,7 +1135,7 @@ static void freeary(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp)
 
        wake_up_q(&wake_q);
        ns->used_sems -= sma->sem_nsems;
-       ipc_rcu_putref(sma, sem_rcu_free);
+       ipc_rcu_putref(&sma->sem_perm, sem_rcu_free);
 }
 
 static unsigned long copy_semid_to_user(void __user *buf, struct semid64_ds *in, int version)
@@ -1174,9 +1167,9 @@ static time_t get_semotime(struct sem_array *sma)
        int i;
        time_t res;
 
-       res = sma->sem_base[0].sem_otime;
+       res = sma->sems[0].sem_otime;
        for (i = 1; i < sma->sem_nsems; i++) {
-               time_t to = sma->sem_base[i].sem_otime;
+               time_t to = sma->sems[i].sem_otime;
 
                if (to > res)
                        res = to;
@@ -1325,7 +1318,7 @@ static int semctl_setval(struct ipc_namespace *ns, int semid, int semnum,
                return -EIDRM;
        }
 
-       curr = &sma->sem_base[semnum];
+       curr = &sma->sems[semnum];
 
        ipc_assert_locked_object(&sma->sem_perm);
        list_for_each_entry(un, &sma->list_id, list_id)
@@ -1382,15 +1375,16 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum,
                        goto out_unlock;
                }
                if (nsems > SEMMSL_FAST) {
-                       if (!ipc_rcu_getref(sma)) {
+                       if (!ipc_rcu_getref(&sma->sem_perm)) {
                                err = -EIDRM;
                                goto out_unlock;
                        }
                        sem_unlock(sma, -1);
                        rcu_read_unlock();
-                       sem_io = ipc_alloc(sizeof(ushort)*nsems);
+                       sem_io = kvmalloc_array(nsems, sizeof(ushort),
+                                               GFP_KERNEL);
                        if (sem_io == NULL) {
-                               ipc_rcu_putref(sma, sem_rcu_free);
+                               ipc_rcu_putref(&sma->sem_perm, sem_rcu_free);
                                return -ENOMEM;
                        }
 
@@ -1402,7 +1396,7 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum,
                        }
                }
                for (i = 0; i < sma->sem_nsems; i++)
-                       sem_io[i] = sma->sem_base[i].semval;
+                       sem_io[i] = sma->sems[i].semval;
                sem_unlock(sma, -1);
                rcu_read_unlock();
                err = 0;
@@ -1415,29 +1409,30 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum,
                int i;
                struct sem_undo *un;
 
-               if (!ipc_rcu_getref(sma)) {
+               if (!ipc_rcu_getref(&sma->sem_perm)) {
                        err = -EIDRM;
                        goto out_rcu_wakeup;
                }
                rcu_read_unlock();
 
                if (nsems > SEMMSL_FAST) {
-                       sem_io = ipc_alloc(sizeof(ushort)*nsems);
+                       sem_io = kvmalloc_array(nsems, sizeof(ushort),
+                                               GFP_KERNEL);
                        if (sem_io == NULL) {
-                               ipc_rcu_putref(sma, sem_rcu_free);
+                               ipc_rcu_putref(&sma->sem_perm, sem_rcu_free);
                                return -ENOMEM;
                        }
                }
 
                if (copy_from_user(sem_io, p, nsems*sizeof(ushort))) {
-                       ipc_rcu_putref(sma, sem_rcu_free);
+                       ipc_rcu_putref(&sma->sem_perm, sem_rcu_free);
                        err = -EFAULT;
                        goto out_free;
                }
 
                for (i = 0; i < nsems; i++) {
                        if (sem_io[i] > SEMVMX) {
-                               ipc_rcu_putref(sma, sem_rcu_free);
+                               ipc_rcu_putref(&sma->sem_perm, sem_rcu_free);
                                err = -ERANGE;
                                goto out_free;
                        }
@@ -1450,8 +1445,8 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum,
                }
 
                for (i = 0; i < nsems; i++) {
-                       sma->sem_base[i].semval = sem_io[i];
-                       sma->sem_base[i].sempid = task_tgid_vnr(current);
+                       sma->sems[i].semval = sem_io[i];
+                       sma->sems[i].sempid = task_tgid_vnr(current);
                }
 
                ipc_assert_locked_object(&sma->sem_perm);
@@ -1476,7 +1471,7 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum,
                err = -EIDRM;
                goto out_unlock;
        }
-       curr = &sma->sem_base[semnum];
+       curr = &sma->sems[semnum];
 
        switch (cmd) {
        case GETVAL:
@@ -1500,7 +1495,7 @@ out_rcu_wakeup:
        wake_up_q(&wake_q);
 out_free:
        if (sem_io != fast_sem_io)
-               ipc_free(sem_io);
+               kvfree(sem_io);
        return err;
 }
 
@@ -1719,7 +1714,7 @@ static struct sem_undo *find_alloc_undo(struct ipc_namespace *ns, int semid)
        }
 
        nsems = sma->sem_nsems;
-       if (!ipc_rcu_getref(sma)) {
+       if (!ipc_rcu_getref(&sma->sem_perm)) {
                rcu_read_unlock();
                un = ERR_PTR(-EIDRM);
                goto out;
@@ -1729,7 +1724,7 @@ static struct sem_undo *find_alloc_undo(struct ipc_namespace *ns, int semid)
        /* step 2: allocate new undo structure */
        new = kzalloc(sizeof(struct sem_undo) + sizeof(short)*nsems, GFP_KERNEL);
        if (!new) {
-               ipc_rcu_putref(sma, sem_rcu_free);
+               ipc_rcu_putref(&sma->sem_perm, sem_rcu_free);
                return ERR_PTR(-ENOMEM);
        }
 
@@ -1932,7 +1927,7 @@ SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsops,
         */
        if (nsops == 1) {
                struct sem *curr;
-               curr = &sma->sem_base[sops->sem_num];
+               curr = &sma->sems[sops->sem_num];
 
                if (alter) {
                        if (sma->complex_count) {
@@ -2146,7 +2141,7 @@ void exit_sem(struct task_struct *tsk)
 
                /* perform adjustments registered in un */
                for (i = 0; i < sma->sem_nsems; i++) {
-                       struct sem *semaphore = &sma->sem_base[i];
+                       struct sem *semaphore = &sma->sems[i];
                        if (un->semadj[i]) {
                                semaphore->semval += un->semadj[i];
                                /*
index f45c7959b26407f6b59559e653fdb0172fddf56c..28a444861a8f489fa46edca81878eedf0f55da7a 100644 (file)
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -174,11 +174,12 @@ static inline void shm_lock_by_ptr(struct shmid_kernel *ipcp)
 
 static void shm_rcu_free(struct rcu_head *head)
 {
-       struct ipc_rcu *p = container_of(head, struct ipc_rcu, rcu);
-       struct shmid_kernel *shp = ipc_rcu_to_struct(p);
-
+       struct kern_ipc_perm *ptr = container_of(head, struct kern_ipc_perm,
+                                                       rcu);
+       struct shmid_kernel *shp = container_of(ptr, struct shmid_kernel,
+                                                       shm_perm);
        security_shm_free(shp);
-       ipc_rcu_free(head);
+       kvfree(shp);
 }
 
 static inline void shm_rmid(struct ipc_namespace *ns, struct shmid_kernel *s)
@@ -241,7 +242,7 @@ static void shm_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp)
                user_shm_unlock(i_size_read(file_inode(shm_file)),
                                shp->mlock_user);
        fput(shm_file);
-       ipc_rcu_putref(shp, shm_rcu_free);
+       ipc_rcu_putref(&shp->shm_perm, shm_rcu_free);
 }
 
 /*
@@ -529,7 +530,6 @@ static int newseg(struct ipc_namespace *ns, struct ipc_params *params)
        size_t numpages = (size + PAGE_SIZE - 1) >> PAGE_SHIFT;
        struct file *file;
        char name[13];
-       int id;
        vm_flags_t acctflag = 0;
 
        if (size < SHMMIN || size > ns->shm_ctlmax)
@@ -542,8 +542,8 @@ static int newseg(struct ipc_namespace *ns, struct ipc_params *params)
                        ns->shm_tot + numpages > ns->shm_ctlall)
                return -ENOSPC;
 
-       shp = ipc_rcu_alloc(sizeof(*shp));
-       if (!shp)
+       shp = kvmalloc(sizeof(*shp), GFP_KERNEL);
+       if (unlikely(!shp))
                return -ENOMEM;
 
        shp->shm_perm.key = key;
@@ -553,7 +553,7 @@ static int newseg(struct ipc_namespace *ns, struct ipc_params *params)
        shp->shm_perm.security = NULL;
        error = security_shm_alloc(shp);
        if (error) {
-               ipc_rcu_putref(shp, ipc_rcu_free);
+               kvfree(shp);
                return error;
        }
 
@@ -598,11 +598,9 @@ static int newseg(struct ipc_namespace *ns, struct ipc_params *params)
        shp->shm_file = file;
        shp->shm_creator = current;
 
-       id = ipc_addid(&shm_ids(ns), &shp->shm_perm, ns->shm_ctlmni);
-       if (id < 0) {
-               error = id;
+       error = ipc_addid(&shm_ids(ns), &shp->shm_perm, ns->shm_ctlmni);
+       if (error < 0)
                goto no_id;
-       }
 
        list_add(&shp->shm_clist, &current->sysvshm.shm_clist);
 
@@ -624,7 +622,7 @@ no_id:
                user_shm_unlock(size, shp->mlock_user);
        fput(file);
 no_file:
-       ipc_rcu_putref(shp, shm_rcu_free);
+       call_rcu(&shp->shm_perm.rcu, shm_rcu_free);
        return error;
 }
 
index caec7b1bfaa335f3d6df017eb9b0ad24058edd9c..1a2cb02467ab5f33f309a4542620cbec8d18361a 100644 (file)
@@ -232,6 +232,7 @@ int ipc_addid(struct ipc_ids *ids, struct kern_ipc_perm *new, int size)
 
        idr_preload(GFP_KERNEL);
 
+       atomic_set(&new->refcount, 1);
        spin_lock_init(&new->lock);
        new->deleted = false;
        rcu_read_lock();
@@ -394,70 +395,18 @@ void ipc_rmid(struct ipc_ids *ids, struct kern_ipc_perm *ipcp)
        ipcp->deleted = true;
 }
 
-/**
- * ipc_alloc - allocate ipc space
- * @size: size desired
- *
- * Allocate memory from the appropriate pools and return a pointer to it.
- * NULL is returned if the allocation fails
- */
-void *ipc_alloc(int size)
-{
-       return kvmalloc(size, GFP_KERNEL);
-}
-
-/**
- * ipc_free - free ipc space
- * @ptr: pointer returned by ipc_alloc
- *
- * Free a block created with ipc_alloc().
- */
-void ipc_free(void *ptr)
+int ipc_rcu_getref(struct kern_ipc_perm *ptr)
 {
-       kvfree(ptr);
+       return atomic_inc_not_zero(&ptr->refcount);
 }
 
-/**
- * ipc_rcu_alloc - allocate ipc and rcu space
- * @size: size desired
- *
- * Allocate memory for the rcu header structure +  the object.
- * Returns the pointer to the object or NULL upon failure.
- */
-void *ipc_rcu_alloc(int size)
+void ipc_rcu_putref(struct kern_ipc_perm *ptr,
+                       void (*func)(struct rcu_head *head))
 {
-       /*
-        * We prepend the allocation with the rcu struct
-        */
-       struct ipc_rcu *out = ipc_alloc(sizeof(struct ipc_rcu) + size);
-       if (unlikely(!out))
-               return NULL;
-       atomic_set(&out->refcount, 1);
-       return out + 1;
-}
-
-int ipc_rcu_getref(void *ptr)
-{
-       struct ipc_rcu *p = ((struct ipc_rcu *)ptr) - 1;
-
-       return atomic_inc_not_zero(&p->refcount);
-}
-
-void ipc_rcu_putref(void *ptr, void (*func)(struct rcu_head *head))
-{
-       struct ipc_rcu *p = ((struct ipc_rcu *)ptr) - 1;
-
-       if (!atomic_dec_and_test(&p->refcount))
+       if (!atomic_dec_and_test(&ptr->refcount))
                return;
 
-       call_rcu(&p->rcu, func);
-}
-
-void ipc_rcu_free(struct rcu_head *head)
-{
-       struct ipc_rcu *p = container_of(head, struct ipc_rcu, rcu);
-
-       kvfree(p);
+       call_rcu(&ptr->rcu, func);
 }
 
 /**
index 60ddccca464dd2a9f54ebe50401cdd009b3084ca..c692010e6f0a394d1d796e33fda3b6b5e46ca16f 100644 (file)
@@ -47,13 +47,6 @@ static inline void msg_exit_ns(struct ipc_namespace *ns) { }
 static inline void shm_exit_ns(struct ipc_namespace *ns) { }
 #endif
 
-struct ipc_rcu {
-       struct rcu_head rcu;
-       atomic_t refcount;
-} ____cacheline_aligned_in_smp;
-
-#define ipc_rcu_to_struct(p)  ((void *)(p+1))
-
 /*
  * Structure that holds the parameters needed by the ipc operations
  * (see after)
@@ -114,22 +107,18 @@ void ipc_rmid(struct ipc_ids *, struct kern_ipc_perm *);
 /* must be called with ipcp locked */
 int ipcperms(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp, short flg);
 
-/* for rare, potentially huge allocations.
- * both function can sleep
- */
-void *ipc_alloc(int size);
-void ipc_free(void *ptr);
-
 /*
  * For allocation that need to be freed by RCU.
  * Objects are reference counted, they start with reference count 1.
  * getref increases the refcount, the putref call that reduces the recount
  * to 0 schedules the rcu destruction. Caller must guarantee locking.
+ *
+ * refcount is initialized by ipc_addid(), before that point call_rcu()
+ * must be used.
  */
-void *ipc_rcu_alloc(int size);
-int ipc_rcu_getref(void *ptr);
-void ipc_rcu_putref(void *ptr, void (*func)(struct rcu_head *head));
-void ipc_rcu_free(struct rcu_head *head);
+int ipc_rcu_getref(struct kern_ipc_perm *ptr);
+void ipc_rcu_putref(struct kern_ipc_perm *ptr,
+                       void (*func)(struct rcu_head *head));
 
 struct kern_ipc_perm *ipc_lock(struct ipc_ids *, int);
 struct kern_ipc_perm *ipc_obtain_object_idr(struct ipc_ids *ids, int id);
index 72aa080f91f04bd9256f1df714186033294b19b9..4cb8e8b23c6ecbcfbf12fda4980a0e9957b94346 100644 (file)
@@ -82,7 +82,7 @@ obj-$(CONFIG_KPROBES) += kprobes.o
 obj-$(CONFIG_KGDB) += debug/
 obj-$(CONFIG_DETECT_HUNG_TASK) += hung_task.o
 obj-$(CONFIG_LOCKUP_DETECTOR) += watchdog.o
-obj-$(CONFIG_HARDLOCKUP_DETECTOR) += watchdog_hld.o
+obj-$(CONFIG_HARDLOCKUP_DETECTOR_PERF) += watchdog_hld.o
 obj-$(CONFIG_SECCOMP) += seccomp.o
 obj-$(CONFIG_RELAY) += relay.o
 obj-$(CONFIG_SYSCTL) += utsname_sysctl.o
index fcbd568f1e95069e6f42a46cad5f5c739f273def..6db80fc0810b9270b0cec3d7cfa2cf9b5a3413f1 100644 (file)
 #include <asm/sections.h>
 
 /* vmcoreinfo stuff */
-static unsigned char vmcoreinfo_data[VMCOREINFO_BYTES];
-u32 vmcoreinfo_note[VMCOREINFO_NOTE_SIZE/4];
-size_t vmcoreinfo_size;
-size_t vmcoreinfo_max_size = sizeof(vmcoreinfo_data);
+static unsigned char *vmcoreinfo_data;
+static size_t vmcoreinfo_size;
+u32 *vmcoreinfo_note;
+
+/* trusted vmcoreinfo, e.g. we can make a copy in the crash memory */
+static unsigned char *vmcoreinfo_data_safecopy;
 
 /*
  * parsing the "crashkernel" commandline
@@ -324,8 +326,23 @@ static void update_vmcoreinfo_note(void)
        final_note(buf);
 }
 
+void crash_update_vmcoreinfo_safecopy(void *ptr)
+{
+       if (ptr)
+               memcpy(ptr, vmcoreinfo_data, vmcoreinfo_size);
+
+       vmcoreinfo_data_safecopy = ptr;
+}
+
 void crash_save_vmcoreinfo(void)
 {
+       if (!vmcoreinfo_note)
+               return;
+
+       /* Use the safe copy to generate vmcoreinfo note if have */
+       if (vmcoreinfo_data_safecopy)
+               vmcoreinfo_data = vmcoreinfo_data_safecopy;
+
        vmcoreinfo_append_str("CRASHTIME=%ld\n", get_seconds());
        update_vmcoreinfo_note();
 }
@@ -340,7 +357,7 @@ void vmcoreinfo_append_str(const char *fmt, ...)
        r = vscnprintf(buf, sizeof(buf), fmt, args);
        va_end(args);
 
-       r = min(r, vmcoreinfo_max_size - vmcoreinfo_size);
+       r = min(r, (size_t)VMCOREINFO_BYTES - vmcoreinfo_size);
 
        memcpy(&vmcoreinfo_data[vmcoreinfo_size], buf, r);
 
@@ -356,11 +373,26 @@ void __weak arch_crash_save_vmcoreinfo(void)
 
 phys_addr_t __weak paddr_vmcoreinfo_note(void)
 {
-       return __pa_symbol((unsigned long)(char *)&vmcoreinfo_note);
+       return __pa(vmcoreinfo_note);
 }
 
 static int __init crash_save_vmcoreinfo_init(void)
 {
+       vmcoreinfo_data = (unsigned char *)get_zeroed_page(GFP_KERNEL);
+       if (!vmcoreinfo_data) {
+               pr_warn("Memory allocation for vmcoreinfo_data failed\n");
+               return -ENOMEM;
+       }
+
+       vmcoreinfo_note = alloc_pages_exact(VMCOREINFO_NOTE_SIZE,
+                                               GFP_KERNEL | __GFP_ZERO);
+       if (!vmcoreinfo_note) {
+               free_page((unsigned long)vmcoreinfo_data);
+               vmcoreinfo_data = NULL;
+               pr_warn("Memory allocation for vmcoreinfo_note failed\n");
+               return -ENOMEM;
+       }
+
        VMCOREINFO_OSRELEASE(init_uts_ns.name.release);
        VMCOREINFO_PAGESIZE(PAGE_SIZE);
 
index 0f69a3e5281effb9185a71bd5f4fcd79f58a6140..17921b0390b4f91113bcf8c9ccac5c1225751460 100644 (file)
@@ -205,19 +205,17 @@ static unsigned long *alloc_thread_stack_node(struct task_struct *tsk, int node)
        void *stack;
        int i;
 
-       local_irq_disable();
        for (i = 0; i < NR_CACHED_STACKS; i++) {
-               struct vm_struct *s = this_cpu_read(cached_stacks[i]);
+               struct vm_struct *s;
+
+               s = this_cpu_xchg(cached_stacks[i], NULL);
 
                if (!s)
                        continue;
-               this_cpu_write(cached_stacks[i], NULL);
 
                tsk->stack_vm_area = s;
-               local_irq_enable();
                return s->addr;
        }
-       local_irq_enable();
 
        stack = __vmalloc_node_range(THREAD_SIZE, THREAD_SIZE,
                                     VMALLOC_START, VMALLOC_END,
@@ -245,19 +243,15 @@ static inline void free_thread_stack(struct task_struct *tsk)
 {
 #ifdef CONFIG_VMAP_STACK
        if (task_stack_vm_area(tsk)) {
-               unsigned long flags;
                int i;
 
-               local_irq_save(flags);
                for (i = 0; i < NR_CACHED_STACKS; i++) {
-                       if (this_cpu_read(cached_stacks[i]))
+                       if (this_cpu_cmpxchg(cached_stacks[i],
+                                       NULL, tsk->stack_vm_area) != NULL)
                                continue;
 
-                       this_cpu_write(cached_stacks[i], tsk->stack_vm_area);
-                       local_irq_restore(flags);
                        return;
                }
-               local_irq_restore(flags);
 
                vfree_atomic(tsk->stack);
                return;
@@ -560,7 +554,7 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
        set_task_stack_end_magic(tsk);
 
 #ifdef CONFIG_CC_STACKPROTECTOR
-       tsk->stack_canary = get_random_long();
+       tsk->stack_canary = get_random_canary();
 #endif
 
        /*
@@ -579,6 +573,10 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
 
        kcov_task_init(tsk);
 
+#ifdef CONFIG_FAULT_INJECTION
+       tsk->fail_nth = 0;
+#endif
+
        return tsk;
 
 free_stack:
index 3a47fa998fe07277c592f62de36f59dad0d09f8b..ea34ed8bb9529c7b7a2a38b0a4e050995b4ba97e 100644 (file)
 #include <linux/bug.h>
 #include <linux/err.h>
 #include <linux/kcmp.h>
+#include <linux/capability.h>
+#include <linux/list.h>
+#include <linux/eventpoll.h>
+#include <linux/file.h>
 
 #include <asm/unistd.h>
 
@@ -94,6 +98,56 @@ static int kcmp_lock(struct mutex *m1, struct mutex *m2)
        return err;
 }
 
+#ifdef CONFIG_EPOLL
+static int kcmp_epoll_target(struct task_struct *task1,
+                            struct task_struct *task2,
+                            unsigned long idx1,
+                            struct kcmp_epoll_slot __user *uslot)
+{
+       struct file *filp, *filp_epoll, *filp_tgt;
+       struct kcmp_epoll_slot slot;
+       struct files_struct *files;
+
+       if (copy_from_user(&slot, uslot, sizeof(slot)))
+               return -EFAULT;
+
+       filp = get_file_raw_ptr(task1, idx1);
+       if (!filp)
+               return -EBADF;
+
+       files = get_files_struct(task2);
+       if (!files)
+               return -EBADF;
+
+       spin_lock(&files->file_lock);
+       filp_epoll = fcheck_files(files, slot.efd);
+       if (filp_epoll)
+               get_file(filp_epoll);
+       else
+               filp_tgt = ERR_PTR(-EBADF);
+       spin_unlock(&files->file_lock);
+       put_files_struct(files);
+
+       if (filp_epoll) {
+               filp_tgt = get_epoll_tfile_raw_ptr(filp_epoll, slot.tfd, slot.toff);
+               fput(filp_epoll);
+       } else
+
+       if (IS_ERR(filp_tgt))
+               return PTR_ERR(filp_tgt);
+
+       return kcmp_ptr(filp, filp_tgt, KCMP_FILE);
+}
+#else
+static int kcmp_epoll_target(struct task_struct *task1,
+                            struct task_struct *task2,
+                            unsigned long idx1,
+                            struct kcmp_epoll_slot __user *uslot)
+{
+       return -EOPNOTSUPP;
+}
+#endif
+
 SYSCALL_DEFINE5(kcmp, pid_t, pid1, pid_t, pid2, int, type,
                unsigned long, idx1, unsigned long, idx2)
 {
@@ -165,6 +219,9 @@ SYSCALL_DEFINE5(kcmp, pid_t, pid1, pid_t, pid2, int, type,
                ret = -EOPNOTSUPP;
 #endif
                break;
+       case KCMP_EPOLL_TFD:
+               ret = kcmp_epoll_target(task1, task2, idx1, (void *)idx2);
+               break;
        default:
                ret = -EINVAL;
                break;
index 980936a90ee6ea0a9f83c195277a7c0705a8bbaa..e62ec4dc662060764acc1ad019056d277eeec6a8 100644 (file)
@@ -144,6 +144,14 @@ static int do_kexec_load(unsigned long entry, unsigned long nr_segments,
        if (ret)
                goto out;
 
+       /*
+        * Some architecture(like S390) may touch the crash memory before
+        * machine_kexec_prepare(), we must copy vmcoreinfo data after it.
+        */
+       ret = kimage_crash_copy_vmcoreinfo(image);
+       if (ret)
+               goto out;
+
        for (i = 0; i < nr_segments; i++) {
                ret = kimage_load_segment(image, &image->segment[i]);
                if (ret)
index 154ffb489b93de01631ed4810b5042ba6fa8f123..1ae7c41c33c19c54e4b08d33c0c59da78244efba 100644 (file)
@@ -482,6 +482,40 @@ struct page *kimage_alloc_control_pages(struct kimage *image,
        return pages;
 }
 
+int kimage_crash_copy_vmcoreinfo(struct kimage *image)
+{
+       struct page *vmcoreinfo_page;
+       void *safecopy;
+
+       if (image->type != KEXEC_TYPE_CRASH)
+               return 0;
+
+       /*
+        * For kdump, allocate one vmcoreinfo safe copy from the
+        * crash memory. as we have arch_kexec_protect_crashkres()
+        * after kexec syscall, we naturally protect it from write
+        * (even read) access under kernel direct mapping. But on
+        * the other hand, we still need to operate it when crash
+        * happens to generate vmcoreinfo note, hereby we rely on
+        * vmap for this purpose.
+        */
+       vmcoreinfo_page = kimage_alloc_control_pages(image, 0);
+       if (!vmcoreinfo_page) {
+               pr_warn("Could not allocate vmcoreinfo buffer\n");
+               return -ENOMEM;
+       }
+       safecopy = vmap(&vmcoreinfo_page, 1, VM_MAP, PAGE_KERNEL);
+       if (!safecopy) {
+               pr_warn("Could not vmap vmcoreinfo buffer\n");
+               return -ENOMEM;
+       }
+
+       image->vmcoreinfo_data_copy = safecopy;
+       crash_update_vmcoreinfo_safecopy(safecopy);
+
+       return 0;
+}
+
 static int kimage_add_entry(struct kimage *image, kimage_entry_t entry)
 {
        if (*image->entry != 0)
@@ -569,6 +603,11 @@ void kimage_free(struct kimage *image)
        if (!image)
                return;
 
+       if (image->vmcoreinfo_data_copy) {
+               crash_update_vmcoreinfo_safecopy(NULL);
+               vunmap(image->vmcoreinfo_data_copy);
+       }
+
        kimage_free_extra_pages(image);
        for_each_kimage_entry(image, ptr, entry) {
                if (entry & IND_INDIRECTION) {
index 766e7e4d3ad91d99a7b3cb90bc6f84511d76c72d..9f48f441229720b0e02434b2375b61d9c62b7706 100644 (file)
 #include <linux/vmalloc.h>
 #include "kexec_internal.h"
 
-/*
- * Declare these symbols weak so that if architecture provides a purgatory,
- * these will be overridden.
- */
-char __weak kexec_purgatory[0];
-size_t __weak kexec_purgatory_size = 0;
-
 static int kexec_calculate_store_digests(struct kimage *image);
 
 /* Architectures can provide this probe function */
@@ -298,6 +291,14 @@ SYSCALL_DEFINE5(kexec_file_load, int, kernel_fd, int, initrd_fd,
        if (ret)
                goto out;
 
+       /*
+        * Some architecture(like S390) may touch the crash memory before
+        * machine_kexec_prepare(), we must copy vmcoreinfo data after it.
+        */
+       ret = kimage_crash_copy_vmcoreinfo(image);
+       if (ret)
+               goto out;
+
        ret = kexec_calculate_store_digests(image);
        if (ret)
                goto out;
index 799a8a4521870a6444818fef64c0ae1e2dfad671..50dfcb039a417eb62abfd85ffac6f91266c8f324 100644 (file)
@@ -17,6 +17,8 @@ extern struct mutex kexec_mutex;
 #ifdef CONFIG_KEXEC_FILE
 #include <linux/purgatory.h>
 void kimage_file_post_load_cleanup(struct kimage *image);
+extern char kexec_purgatory[];
+extern size_t kexec_purgatory_size;
 #else /* CONFIG_KEXEC_FILE */
 static inline void kimage_file_post_load_cleanup(struct kimage *image) { }
 #endif /* CONFIG_KEXEC_FILE */
index df1a9aa602a083a9a97a5b0c8128a684f507f92d..46ba853656f63b7ba1a1e11171b8cdc935666c8d 100644 (file)
@@ -134,7 +134,7 @@ static ssize_t vmcoreinfo_show(struct kobject *kobj,
 {
        phys_addr_t vmcore_base = paddr_vmcoreinfo_note();
        return sprintf(buf, "%pa %x\n", &vmcore_base,
-                      (unsigned int)sizeof(vmcoreinfo_note));
+                       (unsigned int)VMCOREINFO_NOTE_SIZE);
 }
 KERNEL_ATTR_RO(vmcoreinfo);
 
index 4dfba1a76cc360f649cbe6f35b57cf9413f2f5e1..6648fbbb8157fc12703d02fa0fdc9ff85c527ac4 100644 (file)
@@ -174,11 +174,32 @@ extern int no_unaligned_warning;
 
 #ifdef CONFIG_PROC_SYSCTL
 
-#define SYSCTL_WRITES_LEGACY   -1
-#define SYSCTL_WRITES_WARN      0
-#define SYSCTL_WRITES_STRICT    1
+/**
+ * enum sysctl_writes_mode - supported sysctl write modes
+ *
+ * @SYSCTL_WRITES_LEGACY: each write syscall must fully contain the sysctl value
+ *     to be written, and multiple writes on the same sysctl file descriptor
+ *     will rewrite the sysctl value, regardless of file position. No warning
+ *     is issued when the initial position is not 0.
+ * @SYSCTL_WRITES_WARN: same as above but warn when the initial file position is
+ *     not 0.
+ * @SYSCTL_WRITES_STRICT: writes to numeric sysctl entries must always be at
+ *     file position 0 and the value must be fully contained in the buffer
+ *     sent to the write syscall. If dealing with strings respect the file
+ *     position, but restrict this to the max length of the buffer, anything
+ *     passed the max lenght will be ignored. Multiple writes will append
+ *     to the buffer.
+ *
+ * These write modes control how current file position affects the behavior of
+ * updating sysctl values through the proc interface on each write.
+ */
+enum sysctl_writes_mode {
+       SYSCTL_WRITES_LEGACY            = -1,
+       SYSCTL_WRITES_WARN              = 0,
+       SYSCTL_WRITES_STRICT            = 1,
+};
 
-static int sysctl_writes_strict = SYSCTL_WRITES_STRICT;
+static enum sysctl_writes_mode sysctl_writes_strict = SYSCTL_WRITES_STRICT;
 
 static int proc_do_cad_pid(struct ctl_table *table, int write,
                  void __user *buffer, size_t *lenp, loff_t *ppos);
@@ -879,6 +900,14 @@ static struct ctl_table kern_table[] = {
                .extra2         = &zero,
 #endif
        },
+       {
+               .procname       = "watchdog_cpumask",
+               .data           = &watchdog_cpumask_bits,
+               .maxlen         = NR_CPUS,
+               .mode           = 0644,
+               .proc_handler   = proc_watchdog_cpumask,
+       },
+#ifdef CONFIG_SOFTLOCKUP_DETECTOR
        {
                .procname       = "soft_watchdog",
                .data           = &soft_watchdog_enabled,
@@ -888,13 +917,6 @@ static struct ctl_table kern_table[] = {
                .extra1         = &zero,
                .extra2         = &one,
        },
-       {
-               .procname       = "watchdog_cpumask",
-               .data           = &watchdog_cpumask_bits,
-               .maxlen         = NR_CPUS,
-               .mode           = 0644,
-               .proc_handler   = proc_watchdog_cpumask,
-       },
        {
                .procname       = "softlockup_panic",
                .data           = &softlockup_panic,
@@ -904,27 +926,29 @@ static struct ctl_table kern_table[] = {
                .extra1         = &zero,
                .extra2         = &one,
        },
-#ifdef CONFIG_HARDLOCKUP_DETECTOR
+#ifdef CONFIG_SMP
        {
-               .procname       = "hardlockup_panic",
-               .data           = &hardlockup_panic,
+               .procname       = "softlockup_all_cpu_backtrace",
+               .data           = &sysctl_softlockup_all_cpu_backtrace,
                .maxlen         = sizeof(int),
                .mode           = 0644,
                .proc_handler   = proc_dointvec_minmax,
                .extra1         = &zero,
                .extra2         = &one,
        },
+#endif /* CONFIG_SMP */
 #endif
-#ifdef CONFIG_SMP
+#ifdef CONFIG_HARDLOCKUP_DETECTOR
        {
-               .procname       = "softlockup_all_cpu_backtrace",
-               .data           = &sysctl_softlockup_all_cpu_backtrace,
+               .procname       = "hardlockup_panic",
+               .data           = &hardlockup_panic,
                .maxlen         = sizeof(int),
                .mode           = 0644,
                .proc_handler   = proc_dointvec_minmax,
                .extra1         = &zero,
                .extra2         = &one,
        },
+#ifdef CONFIG_SMP
        {
                .procname       = "hardlockup_all_cpu_backtrace",
                .data           = &sysctl_hardlockup_all_cpu_backtrace,
@@ -936,6 +960,8 @@ static struct ctl_table kern_table[] = {
        },
 #endif /* CONFIG_SMP */
 #endif
+#endif
+
 #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86)
        {
                .procname       = "unknown_nmi_panic",
@@ -1949,6 +1975,32 @@ static void warn_sysctl_write(struct ctl_table *table)
                current->comm, table->procname);
 }
 
+/**
+ * proc_first_pos_non_zero_ignore - check if firs position is allowed
+ * @ppos: file position
+ * @table: the sysctl table
+ *
+ * Returns true if the first position is non-zero and the sysctl_writes_strict
+ * mode indicates this is not allowed for numeric input types. String proc
+ * hadlers can ignore the return value.
+ */
+static bool proc_first_pos_non_zero_ignore(loff_t *ppos,
+                                          struct ctl_table *table)
+{
+       if (!*ppos)
+               return false;
+
+       switch (sysctl_writes_strict) {
+       case SYSCTL_WRITES_STRICT:
+               return true;
+       case SYSCTL_WRITES_WARN:
+               warn_sysctl_write(table);
+               return false;
+       default:
+               return false;
+       }
+}
+
 /**
  * proc_dostring - read a string sysctl
  * @table: the sysctl table
@@ -1969,8 +2021,8 @@ static void warn_sysctl_write(struct ctl_table *table)
 int proc_dostring(struct ctl_table *table, int write,
                  void __user *buffer, size_t *lenp, loff_t *ppos)
 {
-       if (write && *ppos && sysctl_writes_strict == SYSCTL_WRITES_WARN)
-               warn_sysctl_write(table);
+       if (write)
+               proc_first_pos_non_zero_ignore(ppos, table);
 
        return _proc_do_string((char *)(table->data), table->maxlen, write,
                               (char __user *)buffer, lenp, ppos);
@@ -2128,19 +2180,18 @@ static int do_proc_dointvec_conv(bool *negp, unsigned long *lvalp,
        return 0;
 }
 
-static int do_proc_douintvec_conv(bool *negp, unsigned long *lvalp,
-                                int *valp,
-                                int write, void *data)
+static int do_proc_douintvec_conv(unsigned long *lvalp,
+                                 unsigned int *valp,
+                                 int write, void *data)
 {
        if (write) {
-               if (*negp)
+               if (*lvalp > UINT_MAX)
                        return -EINVAL;
                if (*lvalp > UINT_MAX)
                        return -EINVAL;
                *valp = *lvalp;
        } else {
                unsigned int val = *valp;
-               *negp = false;
                *lvalp = (unsigned long)val;
        }
        return 0;
@@ -2172,17 +2223,8 @@ static int __do_proc_dointvec(void *tbl_data, struct ctl_table *table,
                conv = do_proc_dointvec_conv;
 
        if (write) {
-               if (*ppos) {
-                       switch (sysctl_writes_strict) {
-                       case SYSCTL_WRITES_STRICT:
-                               goto out;
-                       case SYSCTL_WRITES_WARN:
-                               warn_sysctl_write(table);
-                               break;
-                       default:
-                               break;
-                       }
-               }
+               if (proc_first_pos_non_zero_ignore(ppos, table))
+                       goto out;
 
                if (left > PAGE_SIZE - 1)
                        left = PAGE_SIZE - 1;
@@ -2249,6 +2291,146 @@ static int do_proc_dointvec(struct ctl_table *table, int write,
                        buffer, lenp, ppos, conv, data);
 }
 
+static int do_proc_douintvec_w(unsigned int *tbl_data,
+                              struct ctl_table *table,
+                              void __user *buffer,
+                              size_t *lenp, loff_t *ppos,
+                              int (*conv)(unsigned long *lvalp,
+                                          unsigned int *valp,
+                                          int write, void *data),
+                              void *data)
+{
+       unsigned long lval;
+       int err = 0;
+       size_t left;
+       bool neg;
+       char *kbuf = NULL, *p;
+
+       left = *lenp;
+
+       if (proc_first_pos_non_zero_ignore(ppos, table))
+               goto bail_early;
+
+       if (left > PAGE_SIZE - 1)
+               left = PAGE_SIZE - 1;
+
+       p = kbuf = memdup_user_nul(buffer, left);
+       if (IS_ERR(kbuf))
+               return -EINVAL;
+
+       left -= proc_skip_spaces(&p);
+       if (!left) {
+               err = -EINVAL;
+               goto out_free;
+       }
+
+       err = proc_get_long(&p, &left, &lval, &neg,
+                            proc_wspace_sep,
+                            sizeof(proc_wspace_sep), NULL);
+       if (err || neg) {
+               err = -EINVAL;
+               goto out_free;
+       }
+
+       if (conv(&lval, tbl_data, 1, data)) {
+               err = -EINVAL;
+               goto out_free;
+       }
+
+       if (!err && left)
+               left -= proc_skip_spaces(&p);
+
+out_free:
+       kfree(kbuf);
+       if (err)
+               return -EINVAL;
+
+       return 0;
+
+       /* This is in keeping with old __do_proc_dointvec() */
+bail_early:
+       *ppos += *lenp;
+       return err;
+}
+
+static int do_proc_douintvec_r(unsigned int *tbl_data, void __user *buffer,
+                              size_t *lenp, loff_t *ppos,
+                              int (*conv)(unsigned long *lvalp,
+                                          unsigned int *valp,
+                                          int write, void *data),
+                              void *data)
+{
+       unsigned long lval;
+       int err = 0;
+       size_t left;
+
+       left = *lenp;
+
+       if (conv(&lval, tbl_data, 0, data)) {
+               err = -EINVAL;
+               goto out;
+       }
+
+       err = proc_put_long(&buffer, &left, lval, false);
+       if (err || !left)
+               goto out;
+
+       err = proc_put_char(&buffer, &left, '\n');
+
+out:
+       *lenp -= left;
+       *ppos += *lenp;
+
+       return err;
+}
+
+static int __do_proc_douintvec(void *tbl_data, struct ctl_table *table,
+                              int write, void __user *buffer,
+                              size_t *lenp, loff_t *ppos,
+                              int (*conv)(unsigned long *lvalp,
+                                          unsigned int *valp,
+                                          int write, void *data),
+                              void *data)
+{
+       unsigned int *i, vleft;
+
+       if (!tbl_data || !table->maxlen || !*lenp || (*ppos && !write)) {
+               *lenp = 0;
+               return 0;
+       }
+
+       i = (unsigned int *) tbl_data;
+       vleft = table->maxlen / sizeof(*i);
+
+       /*
+        * Arrays are not supported, keep this simple. *Do not* add
+        * support for them.
+        */
+       if (vleft != 1) {
+               *lenp = 0;
+               return -EINVAL;
+       }
+
+       if (!conv)
+               conv = do_proc_douintvec_conv;
+
+       if (write)
+               return do_proc_douintvec_w(i, table, buffer, lenp, ppos,
+                                          conv, data);
+       return do_proc_douintvec_r(i, buffer, lenp, ppos, conv, data);
+}
+
+static int do_proc_douintvec(struct ctl_table *table, int write,
+                            void __user *buffer, size_t *lenp, loff_t *ppos,
+                            int (*conv)(unsigned long *lvalp,
+                                        unsigned int *valp,
+                                        int write, void *data),
+                            void *data)
+{
+       return __do_proc_douintvec(table->data, table, write,
+                                  buffer, lenp, ppos, conv, data);
+}
+
 /**
  * proc_dointvec - read a vector of integers
  * @table: the sysctl table
@@ -2284,8 +2466,8 @@ int proc_dointvec(struct ctl_table *table, int write,
 int proc_douintvec(struct ctl_table *table, int write,
                     void __user *buffer, size_t *lenp, loff_t *ppos)
 {
-       return do_proc_dointvec(table, write, buffer, lenp, ppos,
-                               do_proc_douintvec_conv, NULL);
+       return do_proc_douintvec(table, write, buffer, lenp, ppos,
+                                do_proc_douintvec_conv, NULL);
 }
 
 /*
@@ -2390,6 +2572,65 @@ int proc_dointvec_minmax(struct ctl_table *table, int write,
                                do_proc_dointvec_minmax_conv, &param);
 }
 
+struct do_proc_douintvec_minmax_conv_param {
+       unsigned int *min;
+       unsigned int *max;
+};
+
+static int do_proc_douintvec_minmax_conv(unsigned long *lvalp,
+                                        unsigned int *valp,
+                                        int write, void *data)
+{
+       struct do_proc_douintvec_minmax_conv_param *param = data;
+
+       if (write) {
+               unsigned int val = *lvalp;
+
+               if ((param->min && *param->min > val) ||
+                   (param->max && *param->max < val))
+                       return -ERANGE;
+
+               if (*lvalp > UINT_MAX)
+                       return -EINVAL;
+               *valp = val;
+       } else {
+               unsigned int val = *valp;
+               *lvalp = (unsigned long) val;
+       }
+
+       return 0;
+}
+
+/**
+ * proc_douintvec_minmax - read a vector of unsigned ints with min/max values
+ * @table: the sysctl table
+ * @write: %TRUE if this is a write to the sysctl file
+ * @buffer: the user buffer
+ * @lenp: the size of the user buffer
+ * @ppos: file position
+ *
+ * Reads/writes up to table->maxlen/sizeof(unsigned int) unsigned integer
+ * values from/to the user buffer, treated as an ASCII string. Negative
+ * strings are not allowed.
+ *
+ * This routine will ensure the values are within the range specified by
+ * table->extra1 (min) and table->extra2 (max). There is a final sanity
+ * check for UINT_MAX to avoid having to support wrap around uses from
+ * userspace.
+ *
+ * Returns 0 on success.
+ */
+int proc_douintvec_minmax(struct ctl_table *table, int write,
+                         void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+       struct do_proc_douintvec_minmax_conv_param param = {
+               .min = (unsigned int *) table->extra1,
+               .max = (unsigned int *) table->extra2,
+       };
+       return do_proc_douintvec(table, write, buffer, lenp, ppos,
+                                do_proc_douintvec_minmax_conv, &param);
+}
+
 static void validate_coredump_safety(void)
 {
 #ifdef CONFIG_COREDUMP
@@ -2447,17 +2688,8 @@ static int __do_proc_doulongvec_minmax(void *data, struct ctl_table *table, int
        left = *lenp;
 
        if (write) {
-               if (*ppos) {
-                       switch (sysctl_writes_strict) {
-                       case SYSCTL_WRITES_STRICT:
-                               goto out;
-                       case SYSCTL_WRITES_WARN:
-                               warn_sysctl_write(table);
-                               break;
-                       default:
-                               break;
-                       }
-               }
+               if (proc_first_pos_non_zero_ignore(ppos, table))
+                       goto out;
 
                if (left > PAGE_SIZE - 1)
                        left = PAGE_SIZE - 1;
@@ -2898,6 +3130,12 @@ int proc_dointvec_minmax(struct ctl_table *table, int write,
        return -ENOSYS;
 }
 
+int proc_douintvec_minmax(struct ctl_table *table, int write,
+                         void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+       return -ENOSYS;
+}
+
 int proc_dointvec_jiffies(struct ctl_table *table, int write,
                    void __user *buffer, size_t *lenp, loff_t *ppos)
 {
@@ -2940,6 +3178,7 @@ EXPORT_SYMBOL(proc_dointvec);
 EXPORT_SYMBOL(proc_douintvec);
 EXPORT_SYMBOL(proc_dointvec_jiffies);
 EXPORT_SYMBOL(proc_dointvec_minmax);
+EXPORT_SYMBOL_GPL(proc_douintvec_minmax);
 EXPORT_SYMBOL(proc_dointvec_userhz_jiffies);
 EXPORT_SYMBOL(proc_dointvec_ms_jiffies);
 EXPORT_SYMBOL(proc_dostring);
index 939a158eab11d2b2cca4f8412f0423449f964efd..02e1859f2ca82e086f0ac73b08c4df07766edede 100644 (file)
@@ -1346,7 +1346,7 @@ static void deprecated_sysctl_warning(const int *name, int nlen)
         * CTL_KERN/KERN_VERSION is used by older glibc and cannot
         * ever go away.
         */
-       if (name[0] == CTL_KERN && name[1] == KERN_VERSION)
+       if (nlen >= 2 && name[0] == CTL_KERN && name[1] == KERN_VERSION)
                return;
 
        if (printk_ratelimit()) {
index 03e0b69bb5bfd6d2cbbf23fb0b5ed18ef1e6492d..cabe3e9fb620f6cf3ff557d1b6957bd358a2d474 100644 (file)
 #include <linux/kvm_para.h>
 #include <linux/kthread.h>
 
+/* Watchdog configuration */
 static DEFINE_MUTEX(watchdog_proc_mutex);
 
-#if defined(CONFIG_HAVE_NMI_WATCHDOG) || defined(CONFIG_HARDLOCKUP_DETECTOR)
-unsigned long __read_mostly watchdog_enabled = SOFT_WATCHDOG_ENABLED|NMI_WATCHDOG_ENABLED;
+int __read_mostly nmi_watchdog_enabled;
+
+#if defined(CONFIG_HARDLOCKUP_DETECTOR) || defined(CONFIG_HAVE_NMI_WATCHDOG)
+unsigned long __read_mostly watchdog_enabled = SOFT_WATCHDOG_ENABLED |
+                                               NMI_WATCHDOG_ENABLED;
 #else
 unsigned long __read_mostly watchdog_enabled = SOFT_WATCHDOG_ENABLED;
 #endif
-int __read_mostly nmi_watchdog_enabled;
+
+#ifdef CONFIG_HARDLOCKUP_DETECTOR
+/* boot commands */
+/*
+ * Should we panic when a soft-lockup or hard-lockup occurs:
+ */
+unsigned int __read_mostly hardlockup_panic =
+                       CONFIG_BOOTPARAM_HARDLOCKUP_PANIC_VALUE;
+/*
+ * We may not want to enable hard lockup detection by default in all cases,
+ * for example when running the kernel as a guest on a hypervisor. In these
+ * cases this function can be called to disable hard lockup detection. This
+ * function should only be executed once by the boot processor before the
+ * kernel command line parameters are parsed, because otherwise it is not
+ * possible to override this in hardlockup_panic_setup().
+ */
+void hardlockup_detector_disable(void)
+{
+       watchdog_enabled &= ~NMI_WATCHDOG_ENABLED;
+}
+
+static int __init hardlockup_panic_setup(char *str)
+{
+       if (!strncmp(str, "panic", 5))
+               hardlockup_panic = 1;
+       else if (!strncmp(str, "nopanic", 7))
+               hardlockup_panic = 0;
+       else if (!strncmp(str, "0", 1))
+               watchdog_enabled &= ~NMI_WATCHDOG_ENABLED;
+       else if (!strncmp(str, "1", 1))
+               watchdog_enabled |= NMI_WATCHDOG_ENABLED;
+       return 1;
+}
+__setup("nmi_watchdog=", hardlockup_panic_setup);
+
+#endif
+
+#ifdef CONFIG_SOFTLOCKUP_DETECTOR
 int __read_mostly soft_watchdog_enabled;
+#endif
+
 int __read_mostly watchdog_user_enabled;
 int __read_mostly watchdog_thresh = 10;
 
@@ -45,15 +88,9 @@ int __read_mostly watchdog_thresh = 10;
 int __read_mostly sysctl_softlockup_all_cpu_backtrace;
 int __read_mostly sysctl_hardlockup_all_cpu_backtrace;
 #endif
-static struct cpumask watchdog_cpumask __read_mostly;
+struct cpumask watchdog_cpumask __read_mostly;
 unsigned long *watchdog_cpumask_bits = cpumask_bits(&watchdog_cpumask);
 
-/* Helper for online, unparked cpus. */
-#define for_each_watchdog_cpu(cpu) \
-       for_each_cpu_and((cpu), cpu_online_mask, &watchdog_cpumask)
-
-atomic_t watchdog_park_in_progress = ATOMIC_INIT(0);
-
 /*
  * The 'watchdog_running' variable is set to 1 when the watchdog threads
  * are registered/started and is set to 0 when the watchdog threads are
@@ -72,7 +109,47 @@ static int __read_mostly watchdog_running;
  * of 'watchdog_running' cannot change while the watchdog is deactivated
  * temporarily (see related code in 'proc' handlers).
  */
-static int __read_mostly watchdog_suspended;
+int __read_mostly watchdog_suspended;
+
+/*
+ * These functions can be overridden if an architecture implements its
+ * own hardlockup detector.
+ *
+ * watchdog_nmi_enable/disable can be implemented to start and stop when
+ * softlockup watchdog threads start and stop. The arch must select the
+ * SOFTLOCKUP_DETECTOR Kconfig.
+ */
+int __weak watchdog_nmi_enable(unsigned int cpu)
+{
+       return 0;
+}
+void __weak watchdog_nmi_disable(unsigned int cpu)
+{
+}
+
+/*
+ * watchdog_nmi_reconfigure can be implemented to be notified after any
+ * watchdog configuration change. The arch hardlockup watchdog should
+ * respond to the following variables:
+ * - nmi_watchdog_enabled
+ * - watchdog_thresh
+ * - watchdog_cpumask
+ * - sysctl_hardlockup_all_cpu_backtrace
+ * - hardlockup_panic
+ * - watchdog_suspended
+ */
+void __weak watchdog_nmi_reconfigure(void)
+{
+}
+
+
+#ifdef CONFIG_SOFTLOCKUP_DETECTOR
+
+/* Helper for online, unparked cpus. */
+#define for_each_watchdog_cpu(cpu) \
+       for_each_cpu_and((cpu), cpu_online_mask, &watchdog_cpumask)
+
+atomic_t watchdog_park_in_progress = ATOMIC_INIT(0);
 
 static u64 __read_mostly sample_period;
 
@@ -120,6 +197,7 @@ static int __init softlockup_all_cpu_backtrace_setup(char *str)
        return 1;
 }
 __setup("softlockup_all_cpu_backtrace=", softlockup_all_cpu_backtrace_setup);
+#ifdef CONFIG_HARDLOCKUP_DETECTOR
 static int __init hardlockup_all_cpu_backtrace_setup(char *str)
 {
        sysctl_hardlockup_all_cpu_backtrace =
@@ -128,6 +206,7 @@ static int __init hardlockup_all_cpu_backtrace_setup(char *str)
 }
 __setup("hardlockup_all_cpu_backtrace=", hardlockup_all_cpu_backtrace_setup);
 #endif
+#endif
 
 /*
  * Hard-lockup warnings should be triggered after just a few seconds. Soft-
@@ -213,18 +292,6 @@ void touch_softlockup_watchdog_sync(void)
        __this_cpu_write(watchdog_touch_ts, 0);
 }
 
-/* watchdog detector functions */
-bool is_hardlockup(void)
-{
-       unsigned long hrint = __this_cpu_read(hrtimer_interrupts);
-
-       if (__this_cpu_read(hrtimer_interrupts_saved) == hrint)
-               return true;
-
-       __this_cpu_write(hrtimer_interrupts_saved, hrint);
-       return false;
-}
-
 static int is_softlockup(unsigned long touch_ts)
 {
        unsigned long now = get_timestamp();
@@ -237,21 +304,21 @@ static int is_softlockup(unsigned long touch_ts)
        return 0;
 }
 
-static void watchdog_interrupt_count(void)
+/* watchdog detector functions */
+bool is_hardlockup(void)
 {
-       __this_cpu_inc(hrtimer_interrupts);
-}
+       unsigned long hrint = __this_cpu_read(hrtimer_interrupts);
 
-/*
- * These two functions are mostly architecture specific
- * defining them as weak here.
- */
-int __weak watchdog_nmi_enable(unsigned int cpu)
-{
-       return 0;
+       if (__this_cpu_read(hrtimer_interrupts_saved) == hrint)
+               return true;
+
+       __this_cpu_write(hrtimer_interrupts_saved, hrint);
+       return false;
 }
-void __weak watchdog_nmi_disable(unsigned int cpu)
+
+static void watchdog_interrupt_count(void)
 {
+       __this_cpu_inc(hrtimer_interrupts);
 }
 
 static int watchdog_enable_all_cpus(void);
@@ -502,57 +569,6 @@ static void watchdog_unpark_threads(void)
                kthread_unpark(per_cpu(softlockup_watchdog, cpu));
 }
 
-/*
- * Suspend the hard and soft lockup detector by parking the watchdog threads.
- */
-int lockup_detector_suspend(void)
-{
-       int ret = 0;
-
-       get_online_cpus();
-       mutex_lock(&watchdog_proc_mutex);
-       /*
-        * Multiple suspend requests can be active in parallel (counted by
-        * the 'watchdog_suspended' variable). If the watchdog threads are
-        * running, the first caller takes care that they will be parked.
-        * The state of 'watchdog_running' cannot change while a suspend
-        * request is active (see related code in 'proc' handlers).
-        */
-       if (watchdog_running && !watchdog_suspended)
-               ret = watchdog_park_threads();
-
-       if (ret == 0)
-               watchdog_suspended++;
-       else {
-               watchdog_disable_all_cpus();
-               pr_err("Failed to suspend lockup detectors, disabled\n");
-               watchdog_enabled = 0;
-       }
-
-       mutex_unlock(&watchdog_proc_mutex);
-
-       return ret;
-}
-
-/*
- * Resume the hard and soft lockup detector by unparking the watchdog threads.
- */
-void lockup_detector_resume(void)
-{
-       mutex_lock(&watchdog_proc_mutex);
-
-       watchdog_suspended--;
-       /*
-        * The watchdog threads are unparked if they were previously running
-        * and if there is no more active suspend request.
-        */
-       if (watchdog_running && !watchdog_suspended)
-               watchdog_unpark_threads();
-
-       mutex_unlock(&watchdog_proc_mutex);
-       put_online_cpus();
-}
-
 static int update_watchdog_all_cpus(void)
 {
        int ret;
@@ -604,6 +620,100 @@ static void watchdog_disable_all_cpus(void)
        }
 }
 
+#ifdef CONFIG_SYSCTL
+static int watchdog_update_cpus(void)
+{
+       return smpboot_update_cpumask_percpu_thread(
+                   &watchdog_threads, &watchdog_cpumask);
+}
+#endif
+
+#else /* SOFTLOCKUP */
+static int watchdog_park_threads(void)
+{
+       return 0;
+}
+
+static void watchdog_unpark_threads(void)
+{
+}
+
+static int watchdog_enable_all_cpus(void)
+{
+       return 0;
+}
+
+static void watchdog_disable_all_cpus(void)
+{
+}
+
+#ifdef CONFIG_SYSCTL
+static int watchdog_update_cpus(void)
+{
+       return 0;
+}
+#endif
+
+static void set_sample_period(void)
+{
+}
+#endif /* SOFTLOCKUP */
+
+/*
+ * Suspend the hard and soft lockup detector by parking the watchdog threads.
+ */
+int lockup_detector_suspend(void)
+{
+       int ret = 0;
+
+       get_online_cpus();
+       mutex_lock(&watchdog_proc_mutex);
+       /*
+        * Multiple suspend requests can be active in parallel (counted by
+        * the 'watchdog_suspended' variable). If the watchdog threads are
+        * running, the first caller takes care that they will be parked.
+        * The state of 'watchdog_running' cannot change while a suspend
+        * request is active (see related code in 'proc' handlers).
+        */
+       if (watchdog_running && !watchdog_suspended)
+               ret = watchdog_park_threads();
+
+       if (ret == 0)
+               watchdog_suspended++;
+       else {
+               watchdog_disable_all_cpus();
+               pr_err("Failed to suspend lockup detectors, disabled\n");
+               watchdog_enabled = 0;
+       }
+
+       watchdog_nmi_reconfigure();
+
+       mutex_unlock(&watchdog_proc_mutex);
+
+       return ret;
+}
+
+/*
+ * Resume the hard and soft lockup detector by unparking the watchdog threads.
+ */
+void lockup_detector_resume(void)
+{
+       mutex_lock(&watchdog_proc_mutex);
+
+       watchdog_suspended--;
+       /*
+        * The watchdog threads are unparked if they were previously running
+        * and if there is no more active suspend request.
+        */
+       if (watchdog_running && !watchdog_suspended)
+               watchdog_unpark_threads();
+
+       watchdog_nmi_reconfigure();
+
+       mutex_unlock(&watchdog_proc_mutex);
+       put_online_cpus();
+}
+
 #ifdef CONFIG_SYSCTL
 
 /*
@@ -625,6 +735,8 @@ static int proc_watchdog_update(void)
        else
                watchdog_disable_all_cpus();
 
+       watchdog_nmi_reconfigure();
+
        return err;
 
 }
@@ -810,10 +922,11 @@ int proc_watchdog_cpumask(struct ctl_table *table, int write,
                         * a temporary cpumask, so we are likely not in a
                         * position to do much else to make things better.
                         */
-                       if (smpboot_update_cpumask_percpu_thread(
-                                   &watchdog_threads, &watchdog_cpumask) != 0)
+                       if (watchdog_update_cpus() != 0)
                                pr_err("cpumask update failed\n");
                }
+
+               watchdog_nmi_reconfigure();
        }
 out:
        mutex_unlock(&watchdog_proc_mutex);
index 54a427d1f344543947867ea3a5a2b3c6e8274d47..295a0d84934cb1d3a9a87abd4a8ac0f1d38656a5 100644 (file)
@@ -22,41 +22,9 @@ static DEFINE_PER_CPU(bool, hard_watchdog_warn);
 static DEFINE_PER_CPU(bool, watchdog_nmi_touch);
 static DEFINE_PER_CPU(struct perf_event *, watchdog_ev);
 
-/* boot commands */
-/*
- * Should we panic when a soft-lockup or hard-lockup occurs:
- */
-unsigned int __read_mostly hardlockup_panic =
-                       CONFIG_BOOTPARAM_HARDLOCKUP_PANIC_VALUE;
 static unsigned long hardlockup_allcpu_dumped;
-/*
- * We may not want to enable hard lockup detection by default in all cases,
- * for example when running the kernel as a guest on a hypervisor. In these
- * cases this function can be called to disable hard lockup detection. This
- * function should only be executed once by the boot processor before the
- * kernel command line parameters are parsed, because otherwise it is not
- * possible to override this in hardlockup_panic_setup().
- */
-void hardlockup_detector_disable(void)
-{
-       watchdog_enabled &= ~NMI_WATCHDOG_ENABLED;
-}
-
-static int __init hardlockup_panic_setup(char *str)
-{
-       if (!strncmp(str, "panic", 5))
-               hardlockup_panic = 1;
-       else if (!strncmp(str, "nopanic", 7))
-               hardlockup_panic = 0;
-       else if (!strncmp(str, "0", 1))
-               watchdog_enabled &= ~NMI_WATCHDOG_ENABLED;
-       else if (!strncmp(str, "1", 1))
-               watchdog_enabled |= NMI_WATCHDOG_ENABLED;
-       return 1;
-}
-__setup("nmi_watchdog=", hardlockup_panic_setup);
 
-void touch_nmi_watchdog(void)
+void arch_touch_nmi_watchdog(void)
 {
        /*
         * Using __raw here because some code paths have
@@ -66,9 +34,8 @@ void touch_nmi_watchdog(void)
         * going off.
         */
        raw_cpu_write(watchdog_nmi_touch, true);
-       touch_softlockup_watchdog();
 }
-EXPORT_SYMBOL(touch_nmi_watchdog);
+EXPORT_SYMBOL(arch_touch_nmi_watchdog);
 
 static struct perf_event_attr wd_hw_attr = {
        .type           = PERF_TYPE_HARDWARE,
index e20fc079bebd6e80239deb858ba508205c70a92b..b0d01c6d4e03ad4d36a5ca9f0559944f94070e85 100644 (file)
@@ -778,34 +778,45 @@ config DEBUG_SHIRQ
 menu "Debug Lockups and Hangs"
 
 config LOCKUP_DETECTOR
-       bool "Detect Hard and Soft Lockups"
+       bool
+
+config SOFTLOCKUP_DETECTOR
+       bool "Detect Soft Lockups"
        depends on DEBUG_KERNEL && !S390
+       select LOCKUP_DETECTOR
        help
          Say Y here to enable the kernel to act as a watchdog to detect
-         hard and soft lockups.
+         soft lockups.
 
          Softlockups are bugs that cause the kernel to loop in kernel
          mode for more than 20 seconds, without giving other tasks a
          chance to run.  The current stack trace is displayed upon
          detection and the system will stay locked up.
 
+config HARDLOCKUP_DETECTOR_PERF
+       bool
+       select SOFTLOCKUP_DETECTOR
+
+#
+# arch/ can define HAVE_HARDLOCKUP_DETECTOR_ARCH to provide their own hard
+# lockup detector rather than the perf based detector.
+#
+config HARDLOCKUP_DETECTOR
+       bool "Detect Hard Lockups"
+       depends on DEBUG_KERNEL && !S390
+       depends on HAVE_HARDLOCKUP_DETECTOR_PERF || HAVE_HARDLOCKUP_DETECTOR_ARCH
+       select LOCKUP_DETECTOR
+       select HARDLOCKUP_DETECTOR_PERF if HAVE_HARDLOCKUP_DETECTOR_PERF
+       select HARDLOCKUP_DETECTOR_ARCH if HAVE_HARDLOCKUP_DETECTOR_ARCH
+       help
+         Say Y here to enable the kernel to act as a watchdog to detect
+         hard lockups.
+
          Hardlockups are bugs that cause the CPU to loop in kernel mode
          for more than 10 seconds, without letting other interrupts have a
          chance to run.  The current stack trace is displayed upon detection
          and the system will stay locked up.
 
-         The overhead should be minimal.  A periodic hrtimer runs to
-         generate interrupts and kick the watchdog task every 4 seconds.
-         An NMI is generated every 10 seconds or so to check for hardlockups.
-
-         The frequency of hrtimer and NMI events and the soft and hard lockup
-         thresholds can be controlled through the sysctl watchdog_thresh.
-
-config HARDLOCKUP_DETECTOR
-       def_bool y
-       depends on LOCKUP_DETECTOR && !HAVE_NMI_WATCHDOG
-       depends on PERF_EVENTS && HAVE_PERF_EVENTS_NMI
-
 config BOOTPARAM_HARDLOCKUP_PANIC
        bool "Panic (Reboot) On Hard Lockups"
        depends on HARDLOCKUP_DETECTOR
@@ -826,7 +837,7 @@ config BOOTPARAM_HARDLOCKUP_PANIC_VALUE
 
 config BOOTPARAM_SOFTLOCKUP_PANIC
        bool "Panic (Reboot) On Soft Lockups"
-       depends on LOCKUP_DETECTOR
+       depends on SOFTLOCKUP_DETECTOR
        help
          Say Y here to enable the kernel to panic on "soft lockups",
          which are bugs that cause the kernel to loop in kernel
@@ -843,7 +854,7 @@ config BOOTPARAM_SOFTLOCKUP_PANIC
 
 config BOOTPARAM_SOFTLOCKUP_PANIC_VALUE
        int
-       depends on LOCKUP_DETECTOR
+       depends on SOFTLOCKUP_DETECTOR
        range 0 1
        default 0 if !BOOTPARAM_SOFTLOCKUP_PANIC
        default 1 if BOOTPARAM_SOFTLOCKUP_PANIC
@@ -851,7 +862,7 @@ config BOOTPARAM_SOFTLOCKUP_PANIC_VALUE
 config DETECT_HUNG_TASK
        bool "Detect Hung Tasks"
        depends on DEBUG_KERNEL
-       default LOCKUP_DETECTOR
+       default SOFTLOCKUP_DETECTOR
        help
          Say Y here to enable the kernel to detect "hung tasks",
          which are bugs that cause the task to be stuck in
@@ -1785,6 +1796,17 @@ config TEST_FIRMWARE
 
          If unsure, say N.
 
+config TEST_SYSCTL
+       tristate "sysctl test driver"
+       default n
+       depends on PROC_SYSCTL
+       help
+         This builds the "test_sysctl" module. This driver enables to test the
+         proc sysctl interfaces available to drivers safely without affecting
+         production knobs which might alter system functionality.
+
+         If unsure, say N.
+
 config TEST_UDELAY
        tristate "udelay test driver"
        default n
index 5a008329324e701c6476f2e9cb4fec2832ec4212..85e91e51a9fed28f5d79a198857c162c918cd860 100644 (file)
@@ -46,6 +46,7 @@ obj-$(CONFIG_TEST_HEXDUMP) += test_hexdump.o
 obj-y += kstrtox.o
 obj-$(CONFIG_TEST_BPF) += test_bpf.o
 obj-$(CONFIG_TEST_FIRMWARE) += test_firmware.o
+obj-$(CONFIG_TEST_SYSCTL) += test_sysctl.o
 obj-$(CONFIG_TEST_HASH) += test_hash.o test_siphash.o
 obj-$(CONFIG_TEST_KASAN) += test_kasan.o
 obj-$(CONFIG_TEST_KSTRTOX) += test-kstrtox.o
index 4ff157159a0d4d9d746fad5d228e878bdecd5be2..09ac73c177fd555b86ef8b3a155ee1043258b532 100644 (file)
@@ -107,6 +107,12 @@ static inline bool fail_stacktrace(struct fault_attr *attr)
 
 bool should_fail(struct fault_attr *attr, ssize_t size)
 {
+       if (in_task() && current->fail_nth) {
+               if (--current->fail_nth == 0)
+                       goto fail;
+               return false;
+       }
+
        /* No need to check any other properties if the probability is 0 */
        if (attr->probability == 0)
                return false;
@@ -134,6 +140,7 @@ bool should_fail(struct fault_attr *attr, ssize_t size)
        if (!fail_stacktrace(attr))
                return false;
 
+fail:
        fail_dump(attr);
 
        if (atomic_read(&attr->times) != -1)
index 8ee7e5ec21be23f658323a9cea840e3d7fb41ce6..3bf4a9984f4cb094b7d74550be1897c20ac292a6 100644 (file)
@@ -72,6 +72,13 @@ void percpu_counter_set(struct percpu_counter *fbc, s64 amount)
 }
 EXPORT_SYMBOL(percpu_counter_set);
 
+/**
+ * This function is both preempt and irq safe. The former is due to explicit
+ * preemption disable. The latter is guaranteed by the fact that the slow path
+ * is explicitly protected by an irq-safe spinlock whereas the fast patch uses
+ * this_cpu_add which is irq-safe by definition. Hence there is no need muck
+ * with irq state before calling this one
+ */
 void percpu_counter_add_batch(struct percpu_counter *fbc, s64 amount, s32 batch)
 {
        s64 count;
index 1c1fc9187b054ccae0d55f6678a23971de109718..ebbb99c775bdab12e6036a85bed06d7521cf94b4 100644 (file)
@@ -978,3 +978,10 @@ char *strreplace(char *s, char old, char new)
        return s;
 }
 EXPORT_SYMBOL(strreplace);
+
+void fortify_panic(const char *name)
+{
+       pr_emerg("detected buffer overflow in %s\n", name);
+       BUG();
+}
+EXPORT_SYMBOL(fortify_panic);
diff --git a/lib/test_sysctl.c b/lib/test_sysctl.c
new file mode 100644 (file)
index 0000000..3dd801c
--- /dev/null
@@ -0,0 +1,148 @@
+/*
+ * proc sysctl test driver
+ *
+ * Copyright (C) 2017 Luis R. Rodriguez <mcgrof@kernel.org>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or at your option any
+ * later version; or, when distributed separately from the Linux kernel or
+ * when incorporated into other software packages, subject to the following
+ * license:
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of copyleft-next (version 0.3.1 or later) as published
+ * at http://copyleft-next.org/.
+ */
+
+/*
+ * This module provides an interface to the the proc sysctl interfaces.  This
+ * driver requires CONFIG_PROC_SYSCTL. It will not normally be loaded by the
+ * system unless explicitly requested by name. You can also build this driver
+ * into your kernel.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/init.h>
+#include <linux/list.h>
+#include <linux/module.h>
+#include <linux/printk.h>
+#include <linux/fs.h>
+#include <linux/miscdevice.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+#include <linux/async.h>
+#include <linux/delay.h>
+#include <linux/vmalloc.h>
+
+static int i_zero;
+static int i_one_hundred = 100;
+
+struct test_sysctl_data {
+       int int_0001;
+       int int_0002;
+       int int_0003[4];
+
+       unsigned int uint_0001;
+
+       char string_0001[65];
+};
+
+static struct test_sysctl_data test_data = {
+       .int_0001 = 60,
+       .int_0002 = 1,
+
+       .int_0003[0] = 0,
+       .int_0003[1] = 1,
+       .int_0003[2] = 2,
+       .int_0003[3] = 3,
+
+       .uint_0001 = 314,
+
+       .string_0001 = "(none)",
+};
+
+/* These are all under /proc/sys/debug/test_sysctl/ */
+static struct ctl_table test_table[] = {
+       {
+               .procname       = "int_0001",
+               .data           = &test_data.int_0001,
+               .maxlen         = sizeof(int),
+               .mode           = 0644,
+               .proc_handler   = proc_dointvec_minmax,
+               .extra1         = &i_zero,
+               .extra2         = &i_one_hundred,
+       },
+       {
+               .procname       = "int_0002",
+               .data           = &test_data.int_0002,
+               .maxlen         = sizeof(int),
+               .mode           = 0644,
+               .proc_handler   = proc_dointvec,
+       },
+       {
+               .procname       = "int_0003",
+               .data           = &test_data.int_0003,
+               .maxlen         = sizeof(test_data.int_0003),
+               .mode           = 0644,
+               .proc_handler   = proc_dointvec,
+       },
+       {
+               .procname       = "uint_0001",
+               .data           = &test_data.uint_0001,
+               .maxlen         = sizeof(unsigned int),
+               .mode           = 0644,
+               .proc_handler   = proc_douintvec,
+       },
+       {
+               .procname       = "string_0001",
+               .data           = &test_data.string_0001,
+               .maxlen         = sizeof(test_data.string_0001),
+               .mode           = 0644,
+               .proc_handler   = proc_dostring,
+       },
+       { }
+};
+
+static struct ctl_table test_sysctl_table[] = {
+       {
+               .procname       = "test_sysctl",
+               .maxlen         = 0,
+               .mode           = 0555,
+               .child          = test_table,
+       },
+       { }
+};
+
+static struct ctl_table test_sysctl_root_table[] = {
+       {
+               .procname       = "debug",
+               .maxlen         = 0,
+               .mode           = 0555,
+               .child          = test_sysctl_table,
+       },
+       { }
+};
+
+static struct ctl_table_header *test_sysctl_header;
+
+static int __init test_sysctl_init(void)
+{
+       test_sysctl_header = register_sysctl_table(test_sysctl_root_table);
+       if (!test_sysctl_header)
+               return -ENOMEM;
+       return 0;
+}
+late_initcall(test_sysctl_init);
+
+static void __exit test_sysctl_exit(void)
+{
+       if (test_sysctl_header)
+               unregister_sysctl_table(test_sysctl_header);
+}
+
+module_exit(test_sysctl_exit);
+
+MODULE_AUTHOR("Luis R. Rodriguez <mcgrof@kernel.org>");
+MODULE_LICENSE("GPL");
index 1e516520433dc72437819267ec1be5a1770b1dd6..bc48ee783dd9e1e31f03a4b0393ef2902ea00a6e 100644 (file)
@@ -1384,7 +1384,7 @@ static struct page *alloc_fresh_huge_page_node(struct hstate *h, int nid)
 
        page = __alloc_pages_node(nid,
                htlb_alloc_mask(h)|__GFP_COMP|__GFP_THISNODE|
-                                               __GFP_REPEAT|__GFP_NOWARN,
+                                               __GFP_RETRY_MAYFAIL|__GFP_NOWARN,
                huge_page_order(h));
        if (page) {
                prep_new_huge_page(h, page, nid);
@@ -1525,7 +1525,7 @@ static struct page *__hugetlb_alloc_buddy_huge_page(struct hstate *h,
 {
        int order = huge_page_order(h);
 
-       gfp_mask |= __GFP_COMP|__GFP_REPEAT|__GFP_NOWARN;
+       gfp_mask |= __GFP_COMP|__GFP_RETRY_MAYFAIL|__GFP_NOWARN;
        if (nid == NUMA_NO_NODE)
                nid = numa_mem_id();
        return __alloc_pages_nodemask(gfp_mask, order, nid, nmask);
index 0e4f558412fb195c0b2bd997fbc0ac1b9c1e4566..24d88f0847059ebc028ce9fc9eabce90caae40b9 100644 (file)
@@ -23,7 +23,7 @@
  * hints such as HIGHMEM usage.
  */
 #define GFP_RECLAIM_MASK (__GFP_RECLAIM|__GFP_HIGH|__GFP_IO|__GFP_FS|\
-                       __GFP_NOWARN|__GFP_REPEAT|__GFP_NOFAIL|\
+                       __GFP_NOWARN|__GFP_RETRY_MAYFAIL|__GFP_NOFAIL|\
                        __GFP_NORETRY|__GFP_MEMALLOC|__GFP_NOMEMALLOC|\
                        __GFP_ATOMIC)
 
index cbb57194687e393a16e9e0e74381bfcb2569b720..0e517be91a89e162bb868af9835c8641aadfd01b 100644 (file)
@@ -3591,7 +3591,7 @@ out:
        return 0;
 }
 
-static int create_huge_pmd(struct vm_fault *vmf)
+static inline int create_huge_pmd(struct vm_fault *vmf)
 {
        if (vma_is_anonymous(vmf->vma))
                return do_huge_pmd_anonymous_page(vmf);
index 7d8e56214ac099ad7c0a1eb8201d344a008428f1..d911fa5cb2a73fe464042a59e1c2676c1129239d 100644 (file)
@@ -1078,7 +1078,8 @@ static struct page *new_page(struct page *page, unsigned long start, int **x)
        /*
         * if !vma, alloc_page_vma() will use task or system default policy
         */
-       return alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, address);
+       return alloc_page_vma(GFP_HIGHUSER_MOVABLE | __GFP_RETRY_MAYFAIL,
+                       vma, address);
 }
 #else
 
index 0b60cc7ddac2b1a63ce71ed17c02a64cb6370e1c..96e93b214d317baf4fb4ffbb5fcbe726e110980d 100644 (file)
@@ -601,7 +601,7 @@ static inline void __wb_writeout_inc(struct bdi_writeback *wb)
 {
        struct wb_domain *cgdom;
 
-       __inc_wb_stat(wb, WB_WRITTEN);
+       inc_wb_stat(wb, WB_WRITTEN);
        wb_domain_writeout_inc(&global_wb_domain, &wb->completions,
                               wb->bdi->max_prop_frac);
 
@@ -2435,8 +2435,8 @@ void account_page_dirtied(struct page *page, struct address_space *mapping)
                __inc_lruvec_page_state(page, NR_FILE_DIRTY);
                __inc_zone_page_state(page, NR_ZONE_WRITE_PENDING);
                __inc_node_page_state(page, NR_DIRTIED);
-               __inc_wb_stat(wb, WB_RECLAIMABLE);
-               __inc_wb_stat(wb, WB_DIRTIED);
+               inc_wb_stat(wb, WB_RECLAIMABLE);
+               inc_wb_stat(wb, WB_DIRTIED);
                task_io_account_write(PAGE_SIZE);
                current->nr_dirtied++;
                this_cpu_inc(bdp_ratelimits);
@@ -2741,7 +2741,7 @@ int test_clear_page_writeback(struct page *page)
                        if (bdi_cap_account_writeback(bdi)) {
                                struct bdi_writeback *wb = inode_to_wb(inode);
 
-                               __dec_wb_stat(wb, WB_WRITEBACK);
+                               dec_wb_stat(wb, WB_WRITEBACK);
                                __wb_writeout_inc(wb);
                        }
                }
@@ -2786,7 +2786,7 @@ int __test_set_page_writeback(struct page *page, bool keep_write)
                                                page_index(page),
                                                PAGECACHE_TAG_WRITEBACK);
                        if (bdi_cap_account_writeback(bdi))
-                               __inc_wb_stat(inode_to_wb(inode), WB_WRITEBACK);
+                               inc_wb_stat(inode_to_wb(inode), WB_WRITEBACK);
 
                        /*
                         * We can come through here when swapping anonymous
index 64b7d82a9b1abeeae0881eb45d71217871538e2b..6d30e914afb6c1b9ecc77b33ee5e01fc7a0ed6b3 100644 (file)
@@ -3284,6 +3284,14 @@ __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order,
        /* The OOM killer will not help higher order allocs */
        if (order > PAGE_ALLOC_COSTLY_ORDER)
                goto out;
+       /*
+        * We have already exhausted all our reclaim opportunities without any
+        * success so it is time to admit defeat. We will skip the OOM killer
+        * because it is very likely that the caller has a more reasonable
+        * fallback than shooting a random task.
+        */
+       if (gfp_mask & __GFP_RETRY_MAYFAIL)
+               goto out;
        /* The OOM killer does not needlessly kill tasks for lowmem */
        if (ac->high_zoneidx < ZONE_NORMAL)
                goto out;
@@ -3413,7 +3421,7 @@ should_compact_retry(struct alloc_context *ac, int order, int alloc_flags,
        }
 
        /*
-        * !costly requests are much more important than __GFP_REPEAT
+        * !costly requests are much more important than __GFP_RETRY_MAYFAIL
         * costly ones because they are de facto nofail and invoke OOM
         * killer to move on while costly can fail and users are ready
         * to cope with that. 1/4 retries is rather arbitrary but we
@@ -3920,9 +3928,9 @@ retry:
 
        /*
         * Do not retry costly high order allocations unless they are
-        * __GFP_REPEAT
+        * __GFP_RETRY_MAYFAIL
         */
-       if (costly_order && !(gfp_mask & __GFP_REPEAT))
+       if (costly_order && !(gfp_mask & __GFP_RETRY_MAYFAIL))
                goto nopage;
 
        if (should_reclaim_retry(gfp_mask, order, ac, alloc_flags,
index a56c3989f77312085f31124f7705908a5f69609a..c50b1a14d55ec0f36ae718f602216dd24d591a6b 100644 (file)
@@ -56,11 +56,11 @@ void * __meminit vmemmap_alloc_block(unsigned long size, int node)
 
                if (node_state(node, N_HIGH_MEMORY))
                        page = alloc_pages_node(
-                               node, GFP_KERNEL | __GFP_ZERO | __GFP_REPEAT,
+                               node, GFP_KERNEL | __GFP_ZERO | __GFP_RETRY_MAYFAIL,
                                get_order(size));
                else
                        page = alloc_pages(
-                               GFP_KERNEL | __GFP_ZERO | __GFP_REPEAT,
+                               GFP_KERNEL | __GFP_ZERO | __GFP_RETRY_MAYFAIL,
                                get_order(size));
                if (page)
                        return page_address(page);
index 26be6407abd7efe452a585d341a8d7e5b53d2b32..ee250e2cde344ef586a65661f8c51271cfe6e648 100644 (file)
--- a/mm/util.c
+++ b/mm/util.c
@@ -339,9 +339,9 @@ EXPORT_SYMBOL(vm_mmap);
  * Uses kmalloc to get the memory but if the allocation fails then falls back
  * to the vmalloc allocator. Use kvfree for freeing the memory.
  *
- * Reclaim modifiers - __GFP_NORETRY and __GFP_NOFAIL are not supported. __GFP_REPEAT
- * is supported only for large (>32kB) allocations, and it should be used only if
- * kmalloc is preferable to the vmalloc fallback, due to visible performance drawbacks.
+ * Reclaim modifiers - __GFP_NORETRY and __GFP_NOFAIL are not supported.
+ * __GFP_RETRY_MAYFAIL is supported, and it should be used only if kmalloc is
+ * preferable to the vmalloc fallback, due to visible performance drawbacks.
  *
  * Any use of gfp flags outside of GFP_KERNEL should be consulted with mm people.
  */
@@ -366,13 +366,7 @@ void *kvmalloc_node(size_t size, gfp_t flags, int node)
        if (size > PAGE_SIZE) {
                kmalloc_flags |= __GFP_NOWARN;
 
-               /*
-                * We have to override __GFP_REPEAT by __GFP_NORETRY for !costly
-                * requests because there is no other way to tell the allocator
-                * that we want to fail rather than retry endlessly.
-                */
-               if (!(kmalloc_flags & __GFP_REPEAT) ||
-                               (size <= PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER))
+               if (!(kmalloc_flags & __GFP_RETRY_MAYFAIL))
                        kmalloc_flags |= __GFP_NORETRY;
        }
 
index 6016ab079e2bd0923affc208b09ee44ccb4e7f5a..8698c1c86c4dbed685269eae1ecded2f5e714368 100644 (file)
@@ -1795,7 +1795,7 @@ fail:
  *     allocator with @gfp_mask flags.  Map them into contiguous
  *     kernel virtual space, using a pagetable protection of @prot.
  *
- *     Reclaim modifiers in @gfp_mask - __GFP_NORETRY, __GFP_REPEAT
+ *     Reclaim modifiers in @gfp_mask - __GFP_NORETRY, __GFP_RETRY_MAYFAIL
  *     and __GFP_NOFAIL are not supported
  *
  *     Any use of gfp flags outside of GFP_KERNEL should be consulted
index e9210f825219c4ec944b747a84656c5e8f5fd007..a1af041930a6b0a4ff3646cde896af327ce503a3 100644 (file)
@@ -2506,18 +2506,18 @@ static inline bool should_continue_reclaim(struct pglist_data *pgdat,
                return false;
 
        /* Consider stopping depending on scan and reclaim activity */
-       if (sc->gfp_mask & __GFP_REPEAT) {
+       if (sc->gfp_mask & __GFP_RETRY_MAYFAIL) {
                /*
-                * For __GFP_REPEAT allocations, stop reclaiming if the
+                * For __GFP_RETRY_MAYFAIL allocations, stop reclaiming if the
                 * full LRU list has been scanned and we are still failing
                 * to reclaim pages. This full LRU scan is potentially
-                * expensive but a __GFP_REPEAT caller really wants to succeed
+                * expensive but a __GFP_RETRY_MAYFAIL caller really wants to succeed
                 */
                if (!nr_reclaimed && !nr_scanned)
                        return false;
        } else {
                /*
-                * For non-__GFP_REPEAT allocations which can presumably
+                * For non-__GFP_RETRY_MAYFAIL allocations which can presumably
                 * fail without consequence, stop if we failed to reclaim
                 * any pages from the last SWAP_CLUSTER_MAX number of
                 * pages that were scanned. This will return to the
index 02440518dd69eafb1594981749678ce78c2c57e0..8515f8fe0460ae08e08e269a47524e0738714626 100644 (file)
@@ -7384,7 +7384,7 @@ static int netif_alloc_rx_queues(struct net_device *dev)
 
        BUG_ON(count < 1);
 
-       rx = kvzalloc(sz, GFP_KERNEL | __GFP_REPEAT);
+       rx = kvzalloc(sz, GFP_KERNEL | __GFP_RETRY_MAYFAIL);
        if (!rx)
                return -ENOMEM;
 
@@ -7424,7 +7424,7 @@ static int netif_alloc_netdev_queues(struct net_device *dev)
        if (count < 1 || count > 0xffff)
                return -EINVAL;
 
-       tx = kvzalloc(sz, GFP_KERNEL | __GFP_REPEAT);
+       tx = kvzalloc(sz, GFP_KERNEL | __GFP_RETRY_MAYFAIL);
        if (!tx)
                return -ENOMEM;
 
@@ -7965,7 +7965,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
        /* ensure 32-byte alignment of whole construct */
        alloc_size += NETDEV_ALIGN - 1;
 
-       p = kvzalloc(alloc_size, GFP_KERNEL | __GFP_REPEAT);
+       p = kvzalloc(alloc_size, GFP_KERNEL | __GFP_RETRY_MAYFAIL);
        if (!p)
                return NULL;
 
index 8b11341ed69ad97d34dd3e9b73c8c44ef7c452ff..f990eb8b30a9c4a57ef39d34413dd2f2a75babb6 100644 (file)
@@ -4747,7 +4747,7 @@ struct sk_buff *alloc_skb_with_frags(unsigned long header_len,
 
        gfp_head = gfp_mask;
        if (gfp_head & __GFP_DIRECT_RECLAIM)
-               gfp_head |= __GFP_REPEAT;
+               gfp_head |= __GFP_RETRY_MAYFAIL;
 
        *errcode = -ENOBUFS;
        skb = alloc_skb(header_len, gfp_head);
index 1770c1d9b37fc14be9d9d8cf4721d21c745826ad..e1648238a9c99d234d77ab1711d18db9d49926bd 100644 (file)
@@ -1003,14 +1003,10 @@ struct xt_table_info *xt_alloc_table_info(unsigned int size)
        if ((SMP_ALIGN(size) >> PAGE_SHIFT) + 2 > totalram_pages)
                return NULL;
 
-       if (sz <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER))
-               info = kmalloc(sz, GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY);
-       if (!info) {
-               info = __vmalloc(sz, GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY,
-                                PAGE_KERNEL);
-               if (!info)
-                       return NULL;
-       }
+       info = kvmalloc(sz, GFP_KERNEL);
+       if (!info)
+               return NULL;
+
        memset(info, 0, sizeof(*info));
        info->size = size;
        return info;
index 147fde73a0f566e8f6a26718adf176ef3943afa0..263d16e3219e6d747496619fe5fedd03c22ba44f 100644 (file)
@@ -648,7 +648,7 @@ static int fq_resize(struct Qdisc *sch, u32 log)
                return 0;
 
        /* If XPS was setup, we can allocate memory on right NUMA node */
-       array = kvmalloc_node(sizeof(struct rb_root) << log, GFP_KERNEL | __GFP_REPEAT,
+       array = kvmalloc_node(sizeof(struct rb_root) << log, GFP_KERNEL | __GFP_RETRY_MAYFAIL,
                              netdev_queue_numa_node_read(sch->dev_queue));
        if (!array)
                return -ENOMEM;
index aa243db93f01fb88e32ad6fdd990f11034a4b18f..be0d4a5fdf53629d1ceeb38dc5953f619277f8e4 100644 (file)
@@ -75,8 +75,8 @@ static int __init example_init(void)
        for (i = 0; i < nents; i++) {
                printk(KERN_INFO
                "sg[%d] -> "
-               "page_link 0x%.8lx offset 0x%.8x length 0x%.8x\n",
-                       i, sg[i].page_link, sg[i].offset, sg[i].length);
+               "page %p offset 0x%.8x length 0x%.8x\n",
+                       i, sg_page(&sg[i]), sg[i].offset, sg[i].length);
 
                if (sg_is_last(&sg[i]))
                        break;
@@ -104,8 +104,8 @@ static int __init example_init(void)
        for (i = 0; i < nents; i++) {
                printk(KERN_INFO
                "sg[%d] -> "
-               "page_link 0x%.8lx offset 0x%.8x length 0x%.8x\n",
-                       i, sg[i].page_link, sg[i].offset, sg[i].length);
+               "page %p offset 0x%.8x length 0x%.8x\n",
+                       i, sg_page(&sg[i]), sg[i].offset, sg[i].length);
 
                if (sg_is_last(&sg[i]))
                        break;
index 8f940c09918f0e6c8971535030f92fccb0f073d5..2287a0bca863bf57ffabaae9de58aab0174826c3 100755 (executable)
@@ -5576,10 +5576,18 @@ sub process {
                            "architecture specific defines should be avoided\n" .  $herecurr);
                }
 
+# check that the storage class is not after a type
+               if ($line =~ /\b($Type)\s+($Storage)\b/) {
+                       WARN("STORAGE_CLASS",
+                            "storage class '$2' should be located before type '$1'\n" . $herecurr);
+               }
 # Check that the storage class is at the beginning of a declaration
-               if ($line =~ /\b$Storage\b/ && $line !~ /^.\s*$Storage\b/) {
+               if ($line =~ /\b$Storage\b/ &&
+                   $line !~ /^.\s*$Storage/ &&
+                   $line =~ /^.\s*(.+?)\$Storage\s/ &&
+                   $1 !~ /[\,\)]\s*$/) {
                        WARN("STORAGE_CLASS",
-                            "storage class should be at the beginning of the declaration\n" . $herecurr)
+                            "storage class should be at the beginning of the declaration\n" . $herecurr);
                }
 
 # check the location of the inline attribute, that it is between
index 7986f4e0da123a240eeca854666dd3cfac86d69b..7aad82406422576d6c5a5d6ae3455cea41f93713 100644 (file)
@@ -14,6 +14,7 @@
 
 #include <linux/fs.h>
 #include <linux/mount.h>
+#include <linux/of_fdt.h>
 
 /* We need to stringify expanded macros so that they can be parsed */
 
@@ -50,3 +51,9 @@ LX_VALUE(MNT_NOEXEC)
 LX_VALUE(MNT_NOATIME)
 LX_VALUE(MNT_NODIRATIME)
 LX_VALUE(MNT_RELATIME)
+
+/* linux/of_fdt.h> */
+LX_VALUE(OF_DT_HEADER)
+
+/* Kernel Configs */
+LX_CONFIG(CONFIG_OF)
index 5afd1098e33a173a18b2310aa24d205534df711c..6d2e09a2ad2f9204dbdb7c5c84522c45530e1342 100644 (file)
@@ -12,6 +12,7 @@
 #
 
 import gdb
+import sys
 
 from linux import utils
 
@@ -24,7 +25,7 @@ class LxDmesg(gdb.Command):
 
     def invoke(self, arg, from_tty):
         log_buf_addr = int(str(gdb.parse_and_eval(
-            "'printk.c'::log_buf")).split()[0], 16)
+            "(void *)'printk.c'::log_buf")).split()[0], 16)
         log_first_idx = int(gdb.parse_and_eval("'printk.c'::log_first_idx"))
         log_next_idx = int(gdb.parse_and_eval("'printk.c'::log_next_idx"))
         log_buf_len = int(gdb.parse_and_eval("'printk.c'::log_buf_len"))
@@ -52,13 +53,19 @@ class LxDmesg(gdb.Command):
                 continue
 
             text_len = utils.read_u16(log_buf[pos + 10:pos + 12])
-            text = log_buf[pos + 16:pos + 16 + text_len].decode()
+            text = log_buf[pos + 16:pos + 16 + text_len].decode(
+                encoding='utf8', errors='replace')
             time_stamp = utils.read_u64(log_buf[pos:pos + 8])
 
             for line in text.splitlines():
-                gdb.write("[{time:12.6f}] {line}\n".format(
+                msg = u"[{time:12.6f}] {line}\n".format(
                     time=time_stamp / 1000000000.0,
-                    line=line))
+                    line=line)
+                # With python2 gdb.write will attempt to convert unicode to
+                # ascii and might fail so pass an utf8-encoded str instead.
+                if sys.hexversion < 0x03000000:
+                    msg = msg.encode(encoding='utf8', errors='replace')
+                gdb.write(msg)
 
             pos += length
 
index 38b1f09d1cd95ecd13558d72214775c68c20f7d6..086d27223c0cf3451105fad63fd5cb46e4f3bc7d 100644 (file)
@@ -16,6 +16,7 @@ from linux import constants
 from linux import utils
 from linux import tasks
 from linux import lists
+from struct import *
 
 
 class LxCmdLine(gdb.Command):
@@ -195,3 +196,75 @@ values of that process namespace"""
                         info_opts(MNT_INFO, m_flags)))
 
 LxMounts()
+
+
+class LxFdtDump(gdb.Command):
+    """Output Flattened Device Tree header and dump FDT blob to the filename
+       specified as the command argument. Equivalent to
+       'cat /proc/fdt > fdtdump.dtb' on a running target"""
+
+    def __init__(self):
+        super(LxFdtDump, self).__init__("lx-fdtdump", gdb.COMMAND_DATA,
+                                        gdb.COMPLETE_FILENAME)
+
+    def fdthdr_to_cpu(self, fdt_header):
+
+        fdt_header_be = ">IIIIIII"
+        fdt_header_le = "<IIIIIII"
+
+        if utils.get_target_endianness() == 1:
+            output_fmt = fdt_header_le
+        else:
+            output_fmt = fdt_header_be
+
+        return unpack(output_fmt, pack(fdt_header_be,
+                                       fdt_header['magic'],
+                                       fdt_header['totalsize'],
+                                       fdt_header['off_dt_struct'],
+                                       fdt_header['off_dt_strings'],
+                                       fdt_header['off_mem_rsvmap'],
+                                       fdt_header['version'],
+                                       fdt_header['last_comp_version']))
+
+    def invoke(self, arg, from_tty):
+
+        if not constants.LX_CONFIG_OF:
+            raise gdb.GdbError("Kernel not compiled with CONFIG_OF\n")
+
+        if len(arg) == 0:
+            filename = "fdtdump.dtb"
+        else:
+            filename = arg
+
+        py_fdt_header_ptr = gdb.parse_and_eval(
+            "(const struct fdt_header *) initial_boot_params")
+        py_fdt_header = py_fdt_header_ptr.dereference()
+
+        fdt_header = self.fdthdr_to_cpu(py_fdt_header)
+
+        if fdt_header[0] != constants.LX_OF_DT_HEADER:
+            raise gdb.GdbError("No flattened device tree magic found\n")
+
+        gdb.write("fdt_magic:         0x{:02X}\n".format(fdt_header[0]))
+        gdb.write("fdt_totalsize:     0x{:02X}\n".format(fdt_header[1]))
+        gdb.write("off_dt_struct:     0x{:02X}\n".format(fdt_header[2]))
+        gdb.write("off_dt_strings:    0x{:02X}\n".format(fdt_header[3]))
+        gdb.write("off_mem_rsvmap:    0x{:02X}\n".format(fdt_header[4]))
+        gdb.write("version:           {}\n".format(fdt_header[5]))
+        gdb.write("last_comp_version: {}\n".format(fdt_header[6]))
+
+        inf = gdb.inferiors()[0]
+        fdt_buf = utils.read_memoryview(inf, py_fdt_header_ptr,
+                                        fdt_header[1]).tobytes()
+
+        try:
+            f = open(filename, 'wb')
+        except:
+            raise gdb.GdbError("Could not open file to dump fdt")
+
+        f.write(fdt_buf)
+        f.close()
+
+        gdb.write("Dumped fdt blob to " + filename + "\n")
+
+LxFdtDump()
index d540bfe7319035e9ae6de6dd63e3f404530efb6e..e8e449444e658be4a9190c6ea2de14cca8fc4890 100644 (file)
@@ -163,6 +163,13 @@ config HARDENED_USERCOPY_PAGESPAN
          been removed. This config is intended to be used only while
          trying to find such users.
 
+config FORTIFY_SOURCE
+       bool "Harden common str/mem functions against buffer overflows"
+       depends on ARCH_HAS_FORTIFY_SOURCE
+       help
+         Detect overflows of buffers in common string and memory functions
+         where the compiler can determine and validate the buffer sizes.
+
 config STATIC_USERMODEHELPER
        bool "Force all usermode helper calls through a single binary"
        help
index 0a8a1c45af87dc32ddaec90a29dcad3d3ecaec14..a1497c516d85aaabc9cd6de476268f7fc7a4bdca 100644 (file)
@@ -643,7 +643,7 @@ static const struct {
        { "__GFP_FS",                   "F" },
        { "__GFP_COLD",                 "CO" },
        { "__GFP_NOWARN",               "NWR" },
-       { "__GFP_REPEAT",               "R" },
+       { "__GFP_RETRY_MAYFAIL",        "R" },
        { "__GFP_NOFAIL",               "NF" },
        { "__GFP_NORETRY",              "NR" },
        { "__GFP_COMP",                 "C" },
index b3c33e071f10069ad01d0029c5d7c0350e413a18..95c320b354e816845aecca8c6df107e98c7bb600 100644 (file)
@@ -4,8 +4,7 @@
 # No binaries, but make sure arg-less "make" doesn't trigger "run_tests".