Unfortunately the memory available for ISA DMA is scarce so unless you
allocate the memory during boot-up it's a good idea to also pass
-__GFP_REPEAT and __GFP_NOWARN to make the allocator try a bit harder.
+__GFP_RETRY_MAYFAIL and __GFP_NOWARN to make the allocator try a bit harder.
(This scarcity also means that you should allocate the buffer as
early as possible and not release it until the driver is unloaded.)
fail_futex=
mmc_core.fail_request=<interval>,<probability>,<space>,<times>
+o proc entries
+
+- /proc/self/task/<current-tid>/fail-nth:
+
+ Write to this file of integer N makes N-th call in the current task fail
+ (N is 0-based). Read from this file returns a single char 'Y' or 'N'
+ that says if the fault setup with a previous write to this file was
+ injected or not, and disables the fault if it wasn't yet injected.
+ Note that this file enables all types of faults (slab, futex, etc).
+ This setting takes precedence over all other generic debugfs settings
+ like probability, interval, times, etc. But per-capability settings
+ (e.g. fail_futex/ignore-private) take precedence over it.
+
+ This feature is intended for systematic testing of faults in a single
+ system call. See an example below.
+
How to add new fault injection capability
-----------------------------------------
# env FAILCMD_TYPE=fail_page_alloc \
./tools/testing/fault-injection/failcmd.sh --times=100 \
-- make -C tools/testing/selftests/ run_tests
+
+Systematic faults using fail-nth
+---------------------------------
+
+The following code systematically faults 0-th, 1-st, 2-nd and so on
+capabilities in the socketpair() system call.
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/socket.h>
+#include <sys/syscall.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <string.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <errno.h>
+
+int main()
+{
+ int i, err, res, fail_nth, fds[2];
+ char buf[128];
+
+ system("echo N > /sys/kernel/debug/failslab/ignore-gfp-wait");
+ sprintf(buf, "/proc/self/task/%ld/fail-nth", syscall(SYS_gettid));
+ fail_nth = open(buf, O_RDWR);
+ for (i = 0;; i++) {
+ sprintf(buf, "%d", i);
+ write(fail_nth, buf, strlen(buf));
+ res = socketpair(AF_LOCAL, SOCK_STREAM, 0, fds);
+ err = errno;
+ read(fail_nth, buf, 1);
+ if (res == 0) {
+ close(fds[0]);
+ close(fds[1]);
+ }
+ printf("%d-th fault %c: res=%d/%d\n", i, buf[0], res, err);
+ if (buf[0] != 'Y')
+ break;
+ }
+ return 0;
+}
+
+An example output:
+
+0-th fault Y: res=-1/23
+1-th fault Y: res=-1/23
+2-th fault Y: res=-1/23
+3-th fault Y: res=-1/12
+4-th fault Y: res=-1/12
+5-th fault Y: res=-1/23
+6-th fault Y: res=-1/23
+7-th fault Y: res=-1/23
+8-th fault Y: res=-1/12
+9-th fault Y: res=-1/12
+10-th fault Y: res=-1/12
+11-th fault Y: res=-1/12
+12-th fault Y: res=-1/12
+13-th fault Y: res=-1/12
+14-th fault Y: res=-1/12
+15-th fault Y: res=-1/12
+16-th fault N: res=0/12
pos: 0
flags: 02
mnt_id: 9
- tfd: 5 events: 1d data: ffffffffffffffff
+ tfd: 5 events: 1d data: ffffffffffffffff pos:0 ino:61af sdev:7
where 'tfd' is a target file descriptor number in decimal form,
'events' is events mask being watched and the 'data' is data
associated with a target [see epoll(7) for more details].
+ The 'pos' is current offset of the target file in decimal form
+ [see lseek(2)], 'ino' and 'sdev' are inode and device numbers
+ where target file resides, all in hex format.
+
Fsnotify files
~~~~~~~~~~~~~~
For inotify files the format is the following
2) Or use the system kernel binary itself as dump-capture kernel and there is
no need to build a separate dump-capture kernel. This is possible
only with the architectures which support a relocatable kernel. As
- of today, i386, x86_64, ppc64, ia64 and arm architectures support relocatable
- kernel.
+ of today, i386, x86_64, ppc64, ia64, arm and arm64 architectures support
+ relocatable kernel.
Building a relocatable kernel is advantageous from the point of view that
one does not have to build a second kernel for capturing the dump. But
For arm64:
- Use vmlinux or Image
-If you are using a uncompressed vmlinux image then use following command
+If you are using an uncompressed vmlinux image then use following command
to load dump-capture kernel.
kexec -p <dump-capture-kernel-vmlinux-image> \
--dtb=<dtb-for-dump-capture-kernel> \
--append="root=<root-dev> <arch-specific-options>"
+If you are using an uncompressed Image, then use following command
+to load dump-capture kernel.
+
+ kexec -p <dump-capture-kernel-Image> \
+ --initrd=<initrd-for-dump-capture-kernel> \
+ --append="root=<root-dev> <arch-specific-options>"
Please note, that --args-linux does not need to be specified for ia64.
It is planned to make this a no-op on that architecture, but for now
config HAVE_NMI
bool
-config HAVE_NMI_WATCHDOG
- depends on HAVE_NMI
- bool
#
# An arch should select this if it provides all these things:
#
config GENERIC_IDLE_POLL_SETUP
bool
+config ARCH_HAS_FORTIFY_SOURCE
+ bool
+ help
+ An architecture should select this when it can successfully
+ build and run with CONFIG_FORTIFY_SOURCE.
+
# Select if arch has all set_memory_ro/rw/x/nx() functions in asm/cacheflush.h
config ARCH_HAS_SET_MEMORY
bool
subsystem. Also has support for calculating CPU cycle events
to determine how many clock cycles in a given period.
+config HAVE_HARDLOCKUP_DETECTOR_PERF
+ bool
+ depends on HAVE_PERF_EVENTS_NMI
+ help
+ The arch chooses to use the generic perf-NMI-based hardlockup
+ detector. Must define HAVE_PERF_EVENTS_NMI.
+
+config HAVE_NMI_WATCHDOG
+ depends on HAVE_NMI
+ bool
+ help
+ The arch provides a low level NMI watchdog. It provides
+ asm/nmi.h, and defines its own arch_touch_nmi_watchdog().
+
+config HAVE_HARDLOCKUP_DETECTOR_ARCH
+ bool
+ select HAVE_NMI_WATCHDOG
+ help
+ The arch chooses to provide its own hardlockup detector, which is
+ a superset of the HAVE_NMI_WATCHDOG. It also conforms to config
+ interfaces and parameters provided by hardlockup detector subsystem.
+
config HAVE_PERF_REGS
bool
help
void __vgic_v3_save_state(struct kvm_vcpu *vcpu);
void __vgic_v3_restore_state(struct kvm_vcpu *vcpu);
-void asmlinkage __vfp_save_state(struct vfp_hard_struct *vfp);
-void asmlinkage __vfp_restore_state(struct vfp_hard_struct *vfp);
+asmlinkage void __vfp_save_state(struct vfp_hard_struct *vfp);
+asmlinkage void __vfp_restore_state(struct vfp_hard_struct *vfp);
static inline bool __vfp_enabled(void)
{
return !(read_sysreg(HCPTR) & (HCPTR_TCP(11) | HCPTR_TCP(10)));
void __hyp_text __banked_save_state(struct kvm_cpu_context *ctxt);
void __hyp_text __banked_restore_state(struct kvm_cpu_context *ctxt);
-int asmlinkage __guest_enter(struct kvm_vcpu *vcpu,
+asmlinkage int __guest_enter(struct kvm_vcpu *vcpu,
struct kvm_cpu_context *host);
-int asmlinkage __hyp_do_panic(const char *, int, u32);
+asmlinkage int __hyp_do_panic(const char *, int, u32);
#endif /* __ARM_KVM_HYP_H__ */
* we need to reverse all data we receive from the mcu due to its physical location
* returns : 01110111 -> 11101110
*/
-u8 inline jornada_ssp_reverse(u8 byte)
+inline u8 jornada_ssp_reverse(u8 byte)
{
return
((0x80 & byte) >> 7) |
select ARCH_HAS_DEVMEM_IS_ALLOWED
select ARCH_HAS_ACPI_TABLE_UPGRADE if ACPI
select ARCH_HAS_ELF_RANDOMIZE
+ select ARCH_HAS_FORTIFY_SOURCE
select ARCH_HAS_GCOV_PROFILE_ALL
select ARCH_HAS_GIGANTIC_PAGE if (MEMORY_ISOLATION && COMPACTION) || CMA
select ARCH_HAS_KCOV
/* Try to get a semi random initial value. */
get_random_bytes(&canary, sizeof(canary));
canary ^= LINUX_VERSION_CODE;
+ canary &= CANARY_MASK;
current->stack_canary = canary;
__stack_chk_guard = current->stack_canary;
#define memcpy(dst, src, len) __memcpy(dst, src, len)
#define memmove(dst, src, len) __memmove(dst, src, len)
#define memset(s, c, n) __memset(s, c, n)
+
+#ifndef __NO_FORTIFY
+#define __NO_FORTIFY /* FORTIFY_SOURCE uses __builtin_memcpy, etc. */
+#endif
+
#endif
#endif
* Leave enough space between the mmap area and the stack to honour ulimit in
* the face of randomisation.
*/
-#define MIN_GAP (SZ_128M + ((STACK_RND_MASK << PAGE_SHIFT) + 1))
+#define MIN_GAP (SZ_128M)
#define MAX_GAP (STACK_TOP/6*5)
static int mmap_is_legacy(void)
static unsigned long mmap_base(unsigned long rnd)
{
unsigned long gap = rlimit(RLIMIT_STACK);
+ unsigned long pad = (STACK_RND_MASK << PAGE_SHIFT) + stack_guard_gap;
+
+ /* Values close to RLIM_INFINITY can overflow. */
+ if (gap + pad > gap)
+ gap += pad;
if (gap < MIN_GAP)
gap = MIN_GAP;
#include <linux/nmi.h>
+extern void arch_touch_nmi_watchdog(void);
+
#endif
}
device_initcall(init_nmi_wdt);
-void touch_nmi_watchdog(void)
+void arch_touch_nmi_watchdog(void)
{
atomic_set(&nmi_touched[smp_processor_id()], 1);
}
/* Main device API. ioctl's to read/set/clear bits, as well as to
* set alarms to wait for using a subsequent select().
*/
-unsigned long inline setget_input(struct gpio_private *priv, unsigned long arg)
+inline unsigned long setget_input(struct gpio_private *priv, unsigned long arg)
{
/* Set direction 0=unchanged 1=input,
* return mask with 1=input */
return dir_g_in_bits;
} /* setget_input */
-unsigned long inline setget_output(struct gpio_private *priv, unsigned long arg)
+inline unsigned long setget_output(struct gpio_private *priv, unsigned long arg)
{
if (USE_PORTS(priv)) {
*priv->dir = *priv->dir_shadow |=
#ifdef CONFIG_MMU
#ifndef __ASSEMBLY__
-extern void asmlinkage __flush_tlb_all(void);
-extern void asmlinkage __flush_tlb_mm(unsigned long contextid);
-extern void asmlinkage __flush_tlb_page(unsigned long contextid, unsigned long start);
-extern void asmlinkage __flush_tlb_range(unsigned long contextid,
+extern asmlinkage void __flush_tlb_all(void);
+extern asmlinkage void __flush_tlb_mm(unsigned long contextid);
+extern asmlinkage void __flush_tlb_page(unsigned long contextid, unsigned long start);
+extern asmlinkage void __flush_tlb_range(unsigned long contextid,
unsigned long start, unsigned long end);
#endif /* !__ASSEMBLY__ */
#endif
}
-phys_addr_t paddr_vmcoreinfo_note(void)
-{
- return ia64_tpa((unsigned long)(char *)&vmcoreinfo_note);
-}
-
ia64_mlogbuf_dump();
}
-static void inline
+static inline void
ia64_mca_spin(const char *func)
{
if (monarch_cpu == smp_processor_id())
/*
* Update the ate.
*/
-void inline
+inline void
ate_write(struct pcibus_info *pcibus_info, int ate_index, int count,
volatile u64 ate)
{
* All registers defined in struct tioce will meet that criteria.
*/
-static void inline
+static inline void
tioce_mmr_war_pre(struct tioce_kernel *kern, void __iomem *mmr_addr)
{
u64 mmr_base;
}
}
-static void inline
+static inline void
tioce_mmr_war_post(struct tioce_kernel *kern, void __iomem *mmr_addr)
{
u64 mmr_base;
#define EINT7 67 /* EDGE Port interrupt 7 */
static unsigned int irqebitmap[] = { 0, 1, 4, 7 };
-static unsigned int inline irq2ebit(unsigned int irq)
+static inline unsigned int irq2ebit(unsigned int irq)
{
return irqebitmap[irq - EINT0];
}
#define EINT1 65 /* EDGE Port interrupt 1 */
#define EINT7 71 /* EDGE Port interrupt 7 */
-static unsigned int inline irq2ebit(unsigned int irq)
+static inline unsigned int irq2ebit(unsigned int irq)
{
return irq - EINT0;
}
{
pud_t *pud;
- pud = (pud_t *) __get_free_pages(GFP_KERNEL|__GFP_REPEAT, PUD_ORDER);
+ pud = (pud_t *) __get_free_pages(GFP_KERNEL, PUD_ORDER);
if (pud)
pud_init((unsigned long)pud, (unsigned long)invalid_pmd_table);
return pud;
/* Mask of CPUs which are currently definitely operating coherently */
extern cpumask_t cpu_coherent_mask;
-extern void asmlinkage smp_bootstrap(void);
+extern asmlinkage void smp_bootstrap(void);
extern void calculate_cpu_foreign_map(void);
#ifndef _ASM_NMI_H
#define _ASM_NMI_H
+extern void arch_touch_nmi_watchdog(void);
+
#endif /* _ASM_NMI_H */
# we can't inline it)
#
###############################################################################
- .globl touch_nmi_watchdog
- .type touch_nmi_watchdog,@function
-touch_nmi_watchdog:
+ .globl arch_touch_nmi_watchdog
+ .type arch_touch_nmi_watchdog,@function
+arch_touch_nmi_watchdog:
clr d0
clr d1
mov watchdog_alert_counter, a0
lne
ret [],0
- .size touch_nmi_watchdog,.-touch_nmi_watchdog
+ .size arch_touch_nmi_watchdog,.-arch_touch_nmi_watchdog
static unsigned int watchdog_hz = 1;
unsigned int watchdog_alert_counter[NR_CPUS];
-EXPORT_SYMBOL(touch_nmi_watchdog);
+EXPORT_SYMBOL(arch_touch_nmi_watchdog);
/*
* the best way to detect whether a CPU has a 'hard lockup' problem
config NMI_IPI
bool
- depends on SMP && (DEBUGGER || KEXEC_CORE)
+ depends on SMP && (DEBUGGER || KEXEC_CORE || HARDLOCKUP_DETECTOR)
default y
config STACKTRACE_SUPPORT
select ARCH_HAS_DEVMEM_IS_ALLOWED
select ARCH_HAS_DMA_SET_COHERENT_MASK
select ARCH_HAS_ELF_RANDOMIZE
+ select ARCH_HAS_FORTIFY_SOURCE
select ARCH_HAS_GCOV_PROFILE_ALL
select ARCH_HAS_SCALED_CPUTIME if VIRT_CPU_ACCOUNTING_NATIVE
select ARCH_HAS_SG_CHAIN
select HAVE_MEMBLOCK
select HAVE_MEMBLOCK_NODE_MAP
select HAVE_MOD_ARCH_SPECIFIC
- select HAVE_NMI if PERF_EVENTS
+ select HAVE_NMI if PERF_EVENTS || (PPC64 && PPC_BOOK3S)
+ select HAVE_HARDLOCKUP_DETECTOR_ARCH if (PPC64 && PPC_BOOK3S)
select HAVE_OPROFILE
select HAVE_OPTPROBES if PPC64
select HAVE_PERF_EVENTS
select HAVE_PERF_EVENTS_NMI if PPC64
+ select HAVE_HARDLOCKUP_DETECTOR_PERF if HAVE_PERF_EVENTS_NMI && !HAVE_HARDLOCKUP_DETECTOR_ARCH
select HAVE_PERF_REGS
select HAVE_PERF_USER_STACK_DUMP
select HAVE_RCU_TABLE_FREE if SMP
return (pgd_t *)__get_free_page(pgtable_gfp_flags(mm, PGALLOC_GFP));
#else
struct page *page;
- page = alloc_pages(pgtable_gfp_flags(mm, PGALLOC_GFP | __GFP_REPEAT),
+ page = alloc_pages(pgtable_gfp_flags(mm, PGALLOC_GFP | __GFP_RETRY_MAYFAIL),
4);
if (!page)
return NULL;
#ifndef _ASM_NMI_H
#define _ASM_NMI_H
+#ifdef CONFIG_HARDLOCKUP_DETECTOR
+extern void arch_touch_nmi_watchdog(void);
+
+extern void arch_trigger_cpumask_backtrace(const cpumask_t *mask,
+ bool exclude_self);
+#define arch_trigger_cpumask_backtrace arch_trigger_cpumask_backtrace
+
+#else
+static inline void arch_touch_nmi_watchdog(void) {}
+#endif
+
#endif /* _ASM_NMI_H */
int (*cpu_bootable)(unsigned int nr);
};
+extern void smp_flush_nmi_ipi(u64 delay_us);
+extern int smp_send_nmi_ipi(int cpu, void (*fn)(struct pt_regs *), u64 delay_us);
extern void smp_send_debugger_break(void);
extern void start_secondary_resume(void);
extern void smp_generic_give_timebase(void);
signal_64.o ptrace32.o \
paca.o nvram_64.o firmware.o
obj-$(CONFIG_VDSO32) += vdso32/
+obj-$(CONFIG_HARDLOCKUP_DETECTOR) += watchdog.o
obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o
obj-$(CONFIG_PPC_BOOK3S_64) += cpu_setup_ppc970.o cpu_setup_pa6t.o
obj-$(CONFIG_PPC_BOOK3S_64) += cpu_setup_power.o
EXC_VIRT_NONE(0x5800, 0x100)
#endif
+#if defined(CONFIG_HARDLOCKUP_DETECTOR) && defined(CONFIG_HAVE_HARDLOCKUP_DETECTOR_ARCH)
+
+#define MASKED_DEC_HANDLER_LABEL 3f
+
+#define MASKED_DEC_HANDLER(_H) \
+3: /* soft-nmi */ \
+ std r12,PACA_EXGEN+EX_R12(r13); \
+ GET_SCRATCH0(r10); \
+ std r10,PACA_EXGEN+EX_R13(r13); \
+ EXCEPTION_PROLOG_PSERIES_1(soft_nmi_common, _H)
+
+EXC_COMMON_BEGIN(soft_nmi_common)
+ mr r10,r1
+ ld r1,PACAEMERGSP(r13)
+ ld r1,PACA_NMI_EMERG_SP(r13)
+ subi r1,r1,INT_FRAME_SIZE
+ EXCEPTION_COMMON_NORET_STACK(PACA_EXGEN, 0x900,
+ system_reset, soft_nmi_interrupt,
+ ADD_NVGPRS;ADD_RECONCILE)
+ b ret_from_except
+
+#else
+#define MASKED_DEC_HANDLER_LABEL 2f /* normal return */
+#define MASKED_DEC_HANDLER(_H)
+#endif
/*
* An interrupt came in while soft-disabled. We set paca->irq_happened, then:
lis r10,0x7fff; \
ori r10,r10,0xffff; \
mtspr SPRN_DEC,r10; \
- b 2f; \
+ b MASKED_DEC_HANDLER_LABEL; \
1: cmpwi r10,PACA_IRQ_DBELL; \
beq 2f; \
cmpwi r10,PACA_IRQ_HMI; \
ld r11,PACA_EXGEN+EX_R11(r13); \
GET_SCRATCH0(r13); \
##_H##rfid; \
- b .
+ b .; \
+ MASKED_DEC_HANDLER(_H)
/*
* Real mode exceptions actually use this too, but alternate
phdr->p_paddr = fadump_relocate(paddr_vmcoreinfo_note());
phdr->p_offset = phdr->p_paddr;
- phdr->p_memsz = vmcoreinfo_max_size;
- phdr->p_filesz = vmcoreinfo_max_size;
+ phdr->p_memsz = phdr->p_filesz = VMCOREINFO_NOTE_SIZE;
/* Increment number of program headers. */
(elf->e_phnum)++;
#include <linux/kvm_para.h>
#include <linux/slab.h>
#include <linux/of.h>
+#include <linux/nmi.h> /* hardlockup_detector_disable() */
#include <asm/reg.h>
#include <asm/sections.h>
static int __init kvm_guest_init(void)
{
+ /*
+ * The hardlockup detector is likely to get false positives in
+ * KVM guests, so disable it by default.
+ */
+ hardlockup_detector_disable();
+
if (!kvm_para_available())
goto free_tmp;
#undef DEBUG_PROM
+/* we cannot use FORTIFY as it brings in new symbols */
+#define __NO_FORTIFY
+
#include <stdarg.h>
#include <linux/kernel.h>
#include <linux/string.h>
struct ppc_pci_io ppc_pci_io;
EXPORT_SYMBOL(ppc_pci_io);
#endif
-
-#ifdef CONFIG_HARDLOCKUP_DETECTOR
-u64 hw_nmi_get_sample_period(int watchdog_thresh)
-{
- return ppc_proc_freq * watchdog_thresh;
-}
-
-/*
- * The hardlockup detector breaks PMU event based branches and is likely
- * to get false positives in KVM guests, so disable it by default.
- */
-static int __init disable_hardlockup_detector(void)
-{
- hardlockup_detector_disable();
-
- return 0;
-}
-early_initcall(disable_hardlockup_detector);
-#endif
}
}
+void smp_flush_nmi_ipi(u64 delay_us)
+{
+ unsigned long flags;
+
+ nmi_ipi_lock_start(&flags);
+ while (nmi_ipi_busy_count) {
+ nmi_ipi_unlock_end(&flags);
+ udelay(1);
+ if (delay_us) {
+ delay_us--;
+ if (!delay_us)
+ return;
+ }
+ nmi_ipi_lock_start(&flags);
+ }
+ nmi_ipi_unlock_end(&flags);
+}
+
/*
* - cpu is the target CPU (must not be this CPU), or NMI_IPI_ALL_OTHERS.
* - fn is the target callback function.
* - delay_us > 0 is the delay before giving up waiting for targets to
* enter the handler, == 0 specifies indefinite delay.
*/
-static int smp_send_nmi_ipi(int cpu, void (*fn)(struct pt_regs *), u64 delay_us)
+int smp_send_nmi_ipi(int cpu, void (*fn)(struct pt_regs *), u64 delay_us)
{
unsigned long flags;
int me = raw_smp_processor_id();
--- /dev/null
+/*
+ * Watchdog support on powerpc systems.
+ *
+ * Copyright 2017, IBM Corporation.
+ *
+ * This uses code from arch/sparc/kernel/nmi.c and kernel/watchdog.c
+ */
+#include <linux/kernel.h>
+#include <linux/param.h>
+#include <linux/init.h>
+#include <linux/percpu.h>
+#include <linux/cpu.h>
+#include <linux/nmi.h>
+#include <linux/module.h>
+#include <linux/export.h>
+#include <linux/kprobes.h>
+#include <linux/hardirq.h>
+#include <linux/reboot.h>
+#include <linux/slab.h>
+#include <linux/kdebug.h>
+#include <linux/sched/debug.h>
+#include <linux/delay.h>
+#include <linux/smp.h>
+
+#include <asm/paca.h>
+
+/*
+ * The watchdog has a simple timer that runs on each CPU, once per timer
+ * period. This is the heartbeat.
+ *
+ * Then there are checks to see if the heartbeat has not triggered on a CPU
+ * for the panic timeout period. Currently the watchdog only supports an
+ * SMP check, so the heartbeat only turns on when we have 2 or more CPUs.
+ *
+ * This is not an NMI watchdog, but Linux uses that name for a generic
+ * watchdog in some cases, so NMI gets used in some places.
+ */
+
+static cpumask_t wd_cpus_enabled __read_mostly;
+
+static u64 wd_panic_timeout_tb __read_mostly; /* timebase ticks until panic */
+static u64 wd_smp_panic_timeout_tb __read_mostly; /* panic other CPUs */
+
+static u64 wd_timer_period_ms __read_mostly; /* interval between heartbeat */
+
+static DEFINE_PER_CPU(struct timer_list, wd_timer);
+static DEFINE_PER_CPU(u64, wd_timer_tb);
+
+/*
+ * These are for the SMP checker. CPUs clear their pending bit in their
+ * heartbeat. If the bitmask becomes empty, the time is noted and the
+ * bitmask is refilled.
+ *
+ * All CPUs clear their bit in the pending mask every timer period.
+ * Once all have cleared, the time is noted and the bits are reset.
+ * If the time since all clear was greater than the panic timeout,
+ * we can panic with the list of stuck CPUs.
+ *
+ * This will work best with NMI IPIs for crash code so the stuck CPUs
+ * can be pulled out to get their backtraces.
+ */
+static unsigned long __wd_smp_lock;
+static cpumask_t wd_smp_cpus_pending;
+static cpumask_t wd_smp_cpus_stuck;
+static u64 wd_smp_last_reset_tb;
+
+static inline void wd_smp_lock(unsigned long *flags)
+{
+ /*
+ * Avoid locking layers if possible.
+ * This may be called from low level interrupt handlers at some
+ * point in future.
+ */
+ local_irq_save(*flags);
+ while (unlikely(test_and_set_bit_lock(0, &__wd_smp_lock)))
+ cpu_relax();
+}
+
+static inline void wd_smp_unlock(unsigned long *flags)
+{
+ clear_bit_unlock(0, &__wd_smp_lock);
+ local_irq_restore(*flags);
+}
+
+static void wd_lockup_ipi(struct pt_regs *regs)
+{
+ pr_emerg("Watchdog CPU:%d Hard LOCKUP\n", raw_smp_processor_id());
+ print_modules();
+ print_irqtrace_events(current);
+ if (regs)
+ show_regs(regs);
+ else
+ dump_stack();
+
+ if (hardlockup_panic)
+ nmi_panic(regs, "Hard LOCKUP");
+}
+
+static void set_cpu_stuck(int cpu, u64 tb)
+{
+ cpumask_set_cpu(cpu, &wd_smp_cpus_stuck);
+ cpumask_clear_cpu(cpu, &wd_smp_cpus_pending);
+ if (cpumask_empty(&wd_smp_cpus_pending)) {
+ wd_smp_last_reset_tb = tb;
+ cpumask_andnot(&wd_smp_cpus_pending,
+ &wd_cpus_enabled,
+ &wd_smp_cpus_stuck);
+ }
+}
+
+static void watchdog_smp_panic(int cpu, u64 tb)
+{
+ unsigned long flags;
+ int c;
+
+ wd_smp_lock(&flags);
+ /* Double check some things under lock */
+ if ((s64)(tb - wd_smp_last_reset_tb) < (s64)wd_smp_panic_timeout_tb)
+ goto out;
+ if (cpumask_test_cpu(cpu, &wd_smp_cpus_pending))
+ goto out;
+ if (cpumask_weight(&wd_smp_cpus_pending) == 0)
+ goto out;
+
+ pr_emerg("Watchdog CPU:%d detected Hard LOCKUP other CPUS:%*pbl\n",
+ cpu, cpumask_pr_args(&wd_smp_cpus_pending));
+
+ /*
+ * Try to trigger the stuck CPUs.
+ */
+ for_each_cpu(c, &wd_smp_cpus_pending) {
+ if (c == cpu)
+ continue;
+ smp_send_nmi_ipi(c, wd_lockup_ipi, 1000000);
+ }
+ smp_flush_nmi_ipi(1000000);
+
+ /* Take the stuck CPU out of the watch group */
+ for_each_cpu(c, &wd_smp_cpus_pending)
+ set_cpu_stuck(c, tb);
+
+out:
+ wd_smp_unlock(&flags);
+
+ printk_safe_flush();
+ /*
+ * printk_safe_flush() seems to require another print
+ * before anything actually goes out to console.
+ */
+ if (sysctl_hardlockup_all_cpu_backtrace)
+ trigger_allbutself_cpu_backtrace();
+
+ if (hardlockup_panic)
+ nmi_panic(NULL, "Hard LOCKUP");
+}
+
+static void wd_smp_clear_cpu_pending(int cpu, u64 tb)
+{
+ if (!cpumask_test_cpu(cpu, &wd_smp_cpus_pending)) {
+ if (unlikely(cpumask_test_cpu(cpu, &wd_smp_cpus_stuck))) {
+ unsigned long flags;
+
+ pr_emerg("Watchdog CPU:%d became unstuck\n", cpu);
+ wd_smp_lock(&flags);
+ cpumask_clear_cpu(cpu, &wd_smp_cpus_stuck);
+ wd_smp_unlock(&flags);
+ }
+ return;
+ }
+ cpumask_clear_cpu(cpu, &wd_smp_cpus_pending);
+ if (cpumask_empty(&wd_smp_cpus_pending)) {
+ unsigned long flags;
+
+ wd_smp_lock(&flags);
+ if (cpumask_empty(&wd_smp_cpus_pending)) {
+ wd_smp_last_reset_tb = tb;
+ cpumask_andnot(&wd_smp_cpus_pending,
+ &wd_cpus_enabled,
+ &wd_smp_cpus_stuck);
+ }
+ wd_smp_unlock(&flags);
+ }
+}
+
+static void watchdog_timer_interrupt(int cpu)
+{
+ u64 tb = get_tb();
+
+ per_cpu(wd_timer_tb, cpu) = tb;
+
+ wd_smp_clear_cpu_pending(cpu, tb);
+
+ if ((s64)(tb - wd_smp_last_reset_tb) >= (s64)wd_smp_panic_timeout_tb)
+ watchdog_smp_panic(cpu, tb);
+}
+
+void soft_nmi_interrupt(struct pt_regs *regs)
+{
+ unsigned long flags;
+ int cpu = raw_smp_processor_id();
+ u64 tb;
+
+ if (!cpumask_test_cpu(cpu, &wd_cpus_enabled))
+ return;
+
+ nmi_enter();
+ tb = get_tb();
+ if (tb - per_cpu(wd_timer_tb, cpu) >= wd_panic_timeout_tb) {
+ per_cpu(wd_timer_tb, cpu) = tb;
+
+ wd_smp_lock(&flags);
+ if (cpumask_test_cpu(cpu, &wd_smp_cpus_stuck)) {
+ wd_smp_unlock(&flags);
+ goto out;
+ }
+ set_cpu_stuck(cpu, tb);
+
+ pr_emerg("Watchdog CPU:%d Hard LOCKUP\n", cpu);
+ print_modules();
+ print_irqtrace_events(current);
+ if (regs)
+ show_regs(regs);
+ else
+ dump_stack();
+
+ wd_smp_unlock(&flags);
+
+ if (sysctl_hardlockup_all_cpu_backtrace)
+ trigger_allbutself_cpu_backtrace();
+
+ if (hardlockup_panic)
+ nmi_panic(regs, "Hard LOCKUP");
+ }
+ if (wd_panic_timeout_tb < 0x7fffffff)
+ mtspr(SPRN_DEC, wd_panic_timeout_tb);
+
+out:
+ nmi_exit();
+}
+
+static void wd_timer_reset(unsigned int cpu, struct timer_list *t)
+{
+ t->expires = jiffies + msecs_to_jiffies(wd_timer_period_ms);
+ if (wd_timer_period_ms > 1000)
+ t->expires = __round_jiffies_up(t->expires, cpu);
+ add_timer_on(t, cpu);
+}
+
+static void wd_timer_fn(unsigned long data)
+{
+ struct timer_list *t = this_cpu_ptr(&wd_timer);
+ int cpu = smp_processor_id();
+
+ watchdog_timer_interrupt(cpu);
+
+ wd_timer_reset(cpu, t);
+}
+
+void arch_touch_nmi_watchdog(void)
+{
+ int cpu = smp_processor_id();
+
+ watchdog_timer_interrupt(cpu);
+}
+EXPORT_SYMBOL(arch_touch_nmi_watchdog);
+
+static void start_watchdog_timer_on(unsigned int cpu)
+{
+ struct timer_list *t = per_cpu_ptr(&wd_timer, cpu);
+
+ per_cpu(wd_timer_tb, cpu) = get_tb();
+
+ setup_pinned_timer(t, wd_timer_fn, 0);
+ wd_timer_reset(cpu, t);
+}
+
+static void stop_watchdog_timer_on(unsigned int cpu)
+{
+ struct timer_list *t = per_cpu_ptr(&wd_timer, cpu);
+
+ del_timer_sync(t);
+}
+
+static int start_wd_on_cpu(unsigned int cpu)
+{
+ if (cpumask_test_cpu(cpu, &wd_cpus_enabled)) {
+ WARN_ON(1);
+ return 0;
+ }
+
+ if (!(watchdog_enabled & NMI_WATCHDOG_ENABLED))
+ return 0;
+
+ if (watchdog_suspended)
+ return 0;
+
+ if (!cpumask_test_cpu(cpu, &watchdog_cpumask))
+ return 0;
+
+ cpumask_set_cpu(cpu, &wd_cpus_enabled);
+ if (cpumask_weight(&wd_cpus_enabled) == 1) {
+ cpumask_set_cpu(cpu, &wd_smp_cpus_pending);
+ wd_smp_last_reset_tb = get_tb();
+ }
+ smp_wmb();
+ start_watchdog_timer_on(cpu);
+
+ return 0;
+}
+
+static int stop_wd_on_cpu(unsigned int cpu)
+{
+ if (!cpumask_test_cpu(cpu, &wd_cpus_enabled))
+ return 0; /* Can happen in CPU unplug case */
+
+ stop_watchdog_timer_on(cpu);
+
+ cpumask_clear_cpu(cpu, &wd_cpus_enabled);
+ wd_smp_clear_cpu_pending(cpu, get_tb());
+
+ return 0;
+}
+
+static void watchdog_calc_timeouts(void)
+{
+ wd_panic_timeout_tb = watchdog_thresh * ppc_tb_freq;
+
+ /* Have the SMP detector trigger a bit later */
+ wd_smp_panic_timeout_tb = wd_panic_timeout_tb * 3 / 2;
+
+ /* 2/5 is the factor that the perf based detector uses */
+ wd_timer_period_ms = watchdog_thresh * 1000 * 2 / 5;
+}
+
+void watchdog_nmi_reconfigure(void)
+{
+ int cpu;
+
+ watchdog_calc_timeouts();
+
+ for_each_cpu(cpu, &wd_cpus_enabled)
+ stop_wd_on_cpu(cpu);
+
+ for_each_cpu_and(cpu, cpu_online_mask, &watchdog_cpumask)
+ start_wd_on_cpu(cpu);
+}
+
+/*
+ * This runs after lockup_detector_init() which sets up watchdog_cpumask.
+ */
+static int __init powerpc_watchdog_init(void)
+{
+ int err;
+
+ watchdog_calc_timeouts();
+
+ err = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "powerpc/watchdog:online",
+ start_wd_on_cpu, stop_wd_on_cpu);
+ if (err < 0)
+ pr_warn("Watchdog could not be initialized");
+
+ return 0;
+}
+arch_initcall(powerpc_watchdog_init);
+
+static void handle_backtrace_ipi(struct pt_regs *regs)
+{
+ nmi_cpu_backtrace(regs);
+}
+
+static void raise_backtrace_ipi(cpumask_t *mask)
+{
+ unsigned int cpu;
+
+ for_each_cpu(cpu, mask) {
+ if (cpu == smp_processor_id())
+ handle_backtrace_ipi(NULL);
+ else
+ smp_send_nmi_ipi(cpu, handle_backtrace_ipi, 1000000);
+ }
+}
+
+void arch_trigger_cpumask_backtrace(const cpumask_t *mask, bool exclude_self)
+{
+ nmi_trigger_cpumask_backtrace(mask, exclude_self, raise_backtrace_ipi);
+}
}
if (!hpt)
- hpt = __get_free_pages(GFP_KERNEL|__GFP_ZERO|__GFP_REPEAT
+ hpt = __get_free_pages(GFP_KERNEL|__GFP_ZERO|__GFP_RETRY_MAYFAIL
|__GFP_NOWARN, order - PAGE_SHIFT);
if (!hpt)
static void test_basic_patching(void)
{
- extern unsigned int ftr_fixup_test1;
- extern unsigned int end_ftr_fixup_test1;
- extern unsigned int ftr_fixup_test1_orig;
- extern unsigned int ftr_fixup_test1_expected;
- int size = &end_ftr_fixup_test1 - &ftr_fixup_test1;
+ extern unsigned int ftr_fixup_test1[];
+ extern unsigned int end_ftr_fixup_test1[];
+ extern unsigned int ftr_fixup_test1_orig[];
+ extern unsigned int ftr_fixup_test1_expected[];
+ int size = end_ftr_fixup_test1 - ftr_fixup_test1;
fixup.value = fixup.mask = 8;
- fixup.start_off = calc_offset(&fixup, &ftr_fixup_test1 + 1);
- fixup.end_off = calc_offset(&fixup, &ftr_fixup_test1 + 2);
+ fixup.start_off = calc_offset(&fixup, ftr_fixup_test1 + 1);
+ fixup.end_off = calc_offset(&fixup, ftr_fixup_test1 + 2);
fixup.alt_start_off = fixup.alt_end_off = 0;
/* Sanity check */
- check(memcmp(&ftr_fixup_test1, &ftr_fixup_test1_orig, size) == 0);
+ check(memcmp(ftr_fixup_test1, ftr_fixup_test1_orig, size) == 0);
/* Check we don't patch if the value matches */
patch_feature_section(8, &fixup);
- check(memcmp(&ftr_fixup_test1, &ftr_fixup_test1_orig, size) == 0);
+ check(memcmp(ftr_fixup_test1, ftr_fixup_test1_orig, size) == 0);
/* Check we do patch if the value doesn't match */
patch_feature_section(0, &fixup);
- check(memcmp(&ftr_fixup_test1, &ftr_fixup_test1_expected, size) == 0);
+ check(memcmp(ftr_fixup_test1, ftr_fixup_test1_expected, size) == 0);
/* Check we do patch if the mask doesn't match */
- memcpy(&ftr_fixup_test1, &ftr_fixup_test1_orig, size);
- check(memcmp(&ftr_fixup_test1, &ftr_fixup_test1_orig, size) == 0);
+ memcpy(ftr_fixup_test1, ftr_fixup_test1_orig, size);
+ check(memcmp(ftr_fixup_test1, ftr_fixup_test1_orig, size) == 0);
patch_feature_section(~8, &fixup);
- check(memcmp(&ftr_fixup_test1, &ftr_fixup_test1_expected, size) == 0);
+ check(memcmp(ftr_fixup_test1, ftr_fixup_test1_expected, size) == 0);
}
static void test_alternative_patching(void)
{
- extern unsigned int ftr_fixup_test2;
- extern unsigned int end_ftr_fixup_test2;
- extern unsigned int ftr_fixup_test2_orig;
- extern unsigned int ftr_fixup_test2_alt;
- extern unsigned int ftr_fixup_test2_expected;
- int size = &end_ftr_fixup_test2 - &ftr_fixup_test2;
+ extern unsigned int ftr_fixup_test2[];
+ extern unsigned int end_ftr_fixup_test2[];
+ extern unsigned int ftr_fixup_test2_orig[];
+ extern unsigned int ftr_fixup_test2_alt[];
+ extern unsigned int ftr_fixup_test2_expected[];
+ int size = end_ftr_fixup_test2 - ftr_fixup_test2;
fixup.value = fixup.mask = 0xF;
- fixup.start_off = calc_offset(&fixup, &ftr_fixup_test2 + 1);
- fixup.end_off = calc_offset(&fixup, &ftr_fixup_test2 + 2);
- fixup.alt_start_off = calc_offset(&fixup, &ftr_fixup_test2_alt);
- fixup.alt_end_off = calc_offset(&fixup, &ftr_fixup_test2_alt + 1);
+ fixup.start_off = calc_offset(&fixup, ftr_fixup_test2 + 1);
+ fixup.end_off = calc_offset(&fixup, ftr_fixup_test2 + 2);
+ fixup.alt_start_off = calc_offset(&fixup, ftr_fixup_test2_alt);
+ fixup.alt_end_off = calc_offset(&fixup, ftr_fixup_test2_alt + 1);
/* Sanity check */
- check(memcmp(&ftr_fixup_test2, &ftr_fixup_test2_orig, size) == 0);
+ check(memcmp(ftr_fixup_test2, ftr_fixup_test2_orig, size) == 0);
/* Check we don't patch if the value matches */
patch_feature_section(0xF, &fixup);
- check(memcmp(&ftr_fixup_test2, &ftr_fixup_test2_orig, size) == 0);
+ check(memcmp(ftr_fixup_test2, ftr_fixup_test2_orig, size) == 0);
/* Check we do patch if the value doesn't match */
patch_feature_section(0, &fixup);
- check(memcmp(&ftr_fixup_test2, &ftr_fixup_test2_expected, size) == 0);
+ check(memcmp(ftr_fixup_test2, ftr_fixup_test2_expected, size) == 0);
/* Check we do patch if the mask doesn't match */
- memcpy(&ftr_fixup_test2, &ftr_fixup_test2_orig, size);
- check(memcmp(&ftr_fixup_test2, &ftr_fixup_test2_orig, size) == 0);
+ memcpy(ftr_fixup_test2, ftr_fixup_test2_orig, size);
+ check(memcmp(ftr_fixup_test2, ftr_fixup_test2_orig, size) == 0);
patch_feature_section(~0xF, &fixup);
- check(memcmp(&ftr_fixup_test2, &ftr_fixup_test2_expected, size) == 0);
+ check(memcmp(ftr_fixup_test2, ftr_fixup_test2_expected, size) == 0);
}
static void test_alternative_case_too_big(void)
{
- extern unsigned int ftr_fixup_test3;
- extern unsigned int end_ftr_fixup_test3;
- extern unsigned int ftr_fixup_test3_orig;
- extern unsigned int ftr_fixup_test3_alt;
- int size = &end_ftr_fixup_test3 - &ftr_fixup_test3;
+ extern unsigned int ftr_fixup_test3[];
+ extern unsigned int end_ftr_fixup_test3[];
+ extern unsigned int ftr_fixup_test3_orig[];
+ extern unsigned int ftr_fixup_test3_alt[];
+ int size = end_ftr_fixup_test3 - ftr_fixup_test3;
fixup.value = fixup.mask = 0xC;
- fixup.start_off = calc_offset(&fixup, &ftr_fixup_test3 + 1);
- fixup.end_off = calc_offset(&fixup, &ftr_fixup_test3 + 2);
- fixup.alt_start_off = calc_offset(&fixup, &ftr_fixup_test3_alt);
- fixup.alt_end_off = calc_offset(&fixup, &ftr_fixup_test3_alt + 2);
+ fixup.start_off = calc_offset(&fixup, ftr_fixup_test3 + 1);
+ fixup.end_off = calc_offset(&fixup, ftr_fixup_test3 + 2);
+ fixup.alt_start_off = calc_offset(&fixup, ftr_fixup_test3_alt);
+ fixup.alt_end_off = calc_offset(&fixup, ftr_fixup_test3_alt + 2);
/* Sanity check */
- check(memcmp(&ftr_fixup_test3, &ftr_fixup_test3_orig, size) == 0);
+ check(memcmp(ftr_fixup_test3, ftr_fixup_test3_orig, size) == 0);
/* Expect nothing to be patched, and the error returned to us */
check(patch_feature_section(0xF, &fixup) == 1);
- check(memcmp(&ftr_fixup_test3, &ftr_fixup_test3_orig, size) == 0);
+ check(memcmp(ftr_fixup_test3, ftr_fixup_test3_orig, size) == 0);
check(patch_feature_section(0, &fixup) == 1);
- check(memcmp(&ftr_fixup_test3, &ftr_fixup_test3_orig, size) == 0);
+ check(memcmp(ftr_fixup_test3, ftr_fixup_test3_orig, size) == 0);
check(patch_feature_section(~0xF, &fixup) == 1);
- check(memcmp(&ftr_fixup_test3, &ftr_fixup_test3_orig, size) == 0);
+ check(memcmp(ftr_fixup_test3, ftr_fixup_test3_orig, size) == 0);
}
static void test_alternative_case_too_small(void)
{
- extern unsigned int ftr_fixup_test4;
- extern unsigned int end_ftr_fixup_test4;
- extern unsigned int ftr_fixup_test4_orig;
- extern unsigned int ftr_fixup_test4_alt;
- extern unsigned int ftr_fixup_test4_expected;
- int size = &end_ftr_fixup_test4 - &ftr_fixup_test4;
+ extern unsigned int ftr_fixup_test4[];
+ extern unsigned int end_ftr_fixup_test4[];
+ extern unsigned int ftr_fixup_test4_orig[];
+ extern unsigned int ftr_fixup_test4_alt[];
+ extern unsigned int ftr_fixup_test4_expected[];
+ int size = end_ftr_fixup_test4 - ftr_fixup_test4;
unsigned long flag;
/* Check a high-bit flag */
flag = 1UL << ((sizeof(unsigned long) - 1) * 8);
fixup.value = fixup.mask = flag;
- fixup.start_off = calc_offset(&fixup, &ftr_fixup_test4 + 1);
- fixup.end_off = calc_offset(&fixup, &ftr_fixup_test4 + 5);
- fixup.alt_start_off = calc_offset(&fixup, &ftr_fixup_test4_alt);
- fixup.alt_end_off = calc_offset(&fixup, &ftr_fixup_test4_alt + 2);
+ fixup.start_off = calc_offset(&fixup, ftr_fixup_test4 + 1);
+ fixup.end_off = calc_offset(&fixup, ftr_fixup_test4 + 5);
+ fixup.alt_start_off = calc_offset(&fixup, ftr_fixup_test4_alt);
+ fixup.alt_end_off = calc_offset(&fixup, ftr_fixup_test4_alt + 2);
/* Sanity check */
- check(memcmp(&ftr_fixup_test4, &ftr_fixup_test4_orig, size) == 0);
+ check(memcmp(ftr_fixup_test4, ftr_fixup_test4_orig, size) == 0);
/* Check we don't patch if the value matches */
patch_feature_section(flag, &fixup);
- check(memcmp(&ftr_fixup_test4, &ftr_fixup_test4_orig, size) == 0);
+ check(memcmp(ftr_fixup_test4, ftr_fixup_test4_orig, size) == 0);
/* Check we do patch if the value doesn't match */
patch_feature_section(0, &fixup);
- check(memcmp(&ftr_fixup_test4, &ftr_fixup_test4_expected, size) == 0);
+ check(memcmp(ftr_fixup_test4, ftr_fixup_test4_expected, size) == 0);
/* Check we do patch if the mask doesn't match */
- memcpy(&ftr_fixup_test4, &ftr_fixup_test4_orig, size);
- check(memcmp(&ftr_fixup_test4, &ftr_fixup_test4_orig, size) == 0);
+ memcpy(ftr_fixup_test4, ftr_fixup_test4_orig, size);
+ check(memcmp(ftr_fixup_test4, ftr_fixup_test4_orig, size) == 0);
patch_feature_section(~flag, &fixup);
- check(memcmp(&ftr_fixup_test4, &ftr_fixup_test4_expected, size) == 0);
+ check(memcmp(ftr_fixup_test4, ftr_fixup_test4_expected, size) == 0);
}
static void test_alternative_case_with_branch(void)
{
- extern unsigned int ftr_fixup_test5;
- extern unsigned int end_ftr_fixup_test5;
- extern unsigned int ftr_fixup_test5_expected;
- int size = &end_ftr_fixup_test5 - &ftr_fixup_test5;
+ extern unsigned int ftr_fixup_test5[];
+ extern unsigned int end_ftr_fixup_test5[];
+ extern unsigned int ftr_fixup_test5_expected[];
+ int size = end_ftr_fixup_test5 - ftr_fixup_test5;
- check(memcmp(&ftr_fixup_test5, &ftr_fixup_test5_expected, size) == 0);
+ check(memcmp(ftr_fixup_test5, ftr_fixup_test5_expected, size) == 0);
}
static void test_alternative_case_with_external_branch(void)
{
- extern unsigned int ftr_fixup_test6;
- extern unsigned int end_ftr_fixup_test6;
- extern unsigned int ftr_fixup_test6_expected;
- int size = &end_ftr_fixup_test6 - &ftr_fixup_test6;
+ extern unsigned int ftr_fixup_test6[];
+ extern unsigned int end_ftr_fixup_test6[];
+ extern unsigned int ftr_fixup_test6_expected[];
+ int size = end_ftr_fixup_test6 - ftr_fixup_test6;
- check(memcmp(&ftr_fixup_test6, &ftr_fixup_test6_expected, size) == 0);
+ check(memcmp(ftr_fixup_test6, ftr_fixup_test6_expected, size) == 0);
}
static void test_cpu_macros(void)
{
- extern u8 ftr_fixup_test_FTR_macros;
- extern u8 ftr_fixup_test_FTR_macros_expected;
- unsigned long size = &ftr_fixup_test_FTR_macros_expected -
- &ftr_fixup_test_FTR_macros;
+ extern u8 ftr_fixup_test_FTR_macros[];
+ extern u8 ftr_fixup_test_FTR_macros_expected[];
+ unsigned long size = ftr_fixup_test_FTR_macros_expected -
+ ftr_fixup_test_FTR_macros;
/* The fixups have already been done for us during boot */
- check(memcmp(&ftr_fixup_test_FTR_macros,
- &ftr_fixup_test_FTR_macros_expected, size) == 0);
+ check(memcmp(ftr_fixup_test_FTR_macros,
+ ftr_fixup_test_FTR_macros_expected, size) == 0);
}
static void test_fw_macros(void)
{
#ifdef CONFIG_PPC64
- extern u8 ftr_fixup_test_FW_FTR_macros;
- extern u8 ftr_fixup_test_FW_FTR_macros_expected;
- unsigned long size = &ftr_fixup_test_FW_FTR_macros_expected -
- &ftr_fixup_test_FW_FTR_macros;
+ extern u8 ftr_fixup_test_FW_FTR_macros[];
+ extern u8 ftr_fixup_test_FW_FTR_macros_expected[];
+ unsigned long size = ftr_fixup_test_FW_FTR_macros_expected -
+ ftr_fixup_test_FW_FTR_macros;
/* The fixups have already been done for us during boot */
- check(memcmp(&ftr_fixup_test_FW_FTR_macros,
- &ftr_fixup_test_FW_FTR_macros_expected, size) == 0);
+ check(memcmp(ftr_fixup_test_FW_FTR_macros,
+ ftr_fixup_test_FW_FTR_macros_expected, size) == 0);
#endif
}
static void test_lwsync_macros(void)
{
- extern u8 lwsync_fixup_test;
- extern u8 end_lwsync_fixup_test;
- extern u8 lwsync_fixup_test_expected_LWSYNC;
- extern u8 lwsync_fixup_test_expected_SYNC;
- unsigned long size = &end_lwsync_fixup_test -
- &lwsync_fixup_test;
+ extern u8 lwsync_fixup_test[];
+ extern u8 end_lwsync_fixup_test[];
+ extern u8 lwsync_fixup_test_expected_LWSYNC[];
+ extern u8 lwsync_fixup_test_expected_SYNC[];
+ unsigned long size = end_lwsync_fixup_test -
+ lwsync_fixup_test;
/* The fixups have already been done for us during boot */
if (cur_cpu_spec->cpu_features & CPU_FTR_LWSYNC) {
- check(memcmp(&lwsync_fixup_test,
- &lwsync_fixup_test_expected_LWSYNC, size) == 0);
+ check(memcmp(lwsync_fixup_test,
+ lwsync_fixup_test_expected_LWSYNC, size) == 0);
} else {
- check(memcmp(&lwsync_fixup_test,
- &lwsync_fixup_test_expected_SYNC, size) == 0);
+ check(memcmp(lwsync_fixup_test,
+ lwsync_fixup_test_expected_SYNC, size) == 0);
}
}
/*
* Top of mmap area (just below the process stack).
*
- * Leave at least a ~128 MB hole on 32bit applications.
- *
- * On 64bit applications we randomise the stack by 1GB so we need to
- * space our mmap start address by a further 1GB, otherwise there is a
- * chance the mmap area will end up closer to the stack than our ulimit
- * requires.
+ * Leave at least a ~128 MB hole.
*/
-#define MIN_GAP32 (128*1024*1024)
-#define MIN_GAP64 ((128 + 1024)*1024*1024UL)
-#define MIN_GAP ((is_32bit_task()) ? MIN_GAP32 : MIN_GAP64)
+#define MIN_GAP (128*1024*1024)
#define MAX_GAP (TASK_SIZE/6*5)
static inline int mmap_is_legacy(void)
return rnd << PAGE_SHIFT;
}
+static inline unsigned long stack_maxrandom_size(void)
+{
+ if (!(current->flags & PF_RANDOMIZE))
+ return 0;
+
+ /* 8MB for 32bit, 1GB for 64bit */
+ if (is_32bit_task())
+ return (1<<23);
+ else
+ return (1<<30);
+}
+
static inline unsigned long mmap_base(unsigned long rnd)
{
unsigned long gap = rlimit(RLIMIT_STACK);
+ unsigned long pad = stack_maxrandom_size() + stack_guard_gap;
+
+ /* Values close to RLIM_INFINITY can overflow. */
+ if (gap + pad > gap)
+ gap += pad;
if (gap < MIN_GAP)
gap = MIN_GAP;
VMCOREINFO_SYMBOL(lowcore_ptr);
VMCOREINFO_SYMBOL(high_memory);
VMCOREINFO_LENGTH(lowcore_ptr, NR_CPUS);
+ mem_assign_absolute(S390_lowcore.vmcore_info, paddr_vmcoreinfo_note());
}
void machine_shutdown(void)
pr_notice("The maximum memory size is %luMB\n", memory_end >> 20);
}
-static void __init setup_vmcoreinfo(void)
-{
- mem_assign_absolute(S390_lowcore.vmcore_info, paddr_vmcoreinfo_note());
-}
-
#ifdef CONFIG_CRASH_DUMP
/*
#endif
setup_resources();
- setup_vmcoreinfo();
setup_lowcore();
smp_fill_possible_mask();
cpu_detect_mhz_feature();
"i" (__FILE__), \
"i" (__LINE__), "i" (0), \
"i" (sizeof(struct bug_entry))); \
+ unreachable(); \
} while (0)
#define __WARN_FLAGS(flags) \
/* Try to get a semi random initial value. */
get_random_bytes(&canary, sizeof(canary));
canary ^= LINUX_VERSION_CODE;
+ canary &= CANARY_MASK;
current->stack_canary = canary;
__stack_chk_guard = current->stack_canary;
#define DUMMY_ALLOCO_AREA_SIZE ((L1_CACHE_BYTES << 10) + (1024 * 4))
static unsigned char dummy_alloco_area[DUMMY_ALLOCO_AREA_SIZE] __cacheline_aligned = { 0, };
-static void inline sh64_dcache_purge_sets(int sets_to_purge_base, int n_sets)
+static inline void sh64_dcache_purge_sets(int sets_to_purge_base, int n_sets)
{
/* Purge all ways in a particular block of sets, specified by the base
set number and number of sets. Can handle wrap-around, if that's
extern atomic_t nmi_active;
+void arch_touch_nmi_watchdog(void);
void start_nmi_watchdog(void *unused);
void stop_nmi_watchdog(void *unused);
handle_size = (sizeof(struct mdesc_handle) -
sizeof(struct mdesc_hdr) +
mdesc_size);
- base = kmalloc(handle_size + 15, GFP_KERNEL | __GFP_REPEAT);
+ base = kmalloc(handle_size + 15, GFP_KERNEL | __GFP_RETRY_MAYFAIL);
if (!base)
return NULL;
static DEFINE_PER_CPU(long, alert_counter);
static DEFINE_PER_CPU(int, nmi_touch);
-void touch_nmi_watchdog(void)
+void arch_touch_nmi_watchdog(void)
{
if (atomic_read(&nmi_active)) {
int cpu;
per_cpu(nmi_touch, cpu) = 1;
}
}
-
- touch_softlockup_watchdog();
}
-EXPORT_SYMBOL(touch_nmi_watchdog);
+EXPORT_SYMBOL(arch_touch_nmi_watchdog);
static void die_nmi(const char *str, struct pt_regs *regs, int do_panic)
{
select ARCH_HAS_DEVMEM_IS_ALLOWED
select ARCH_HAS_ELF_RANDOMIZE
select ARCH_HAS_FAST_MULTIPLIER
+ select ARCH_HAS_FORTIFY_SOURCE
select ARCH_HAS_GCOV_PROFILE_ALL
select ARCH_HAS_KCOV if X86_64
select ARCH_HAS_MMIO_FLUSH
select HAVE_PCSPKR_PLATFORM
select HAVE_PERF_EVENTS
select HAVE_PERF_EVENTS_NMI
+ select HAVE_HARDLOCKUP_DETECTOR_PERF if HAVE_PERF_EVENTS_NMI
select HAVE_PERF_REGS
select HAVE_PERF_USER_STACK_DUMP
select HAVE_REGS_AND_STACK_ACCESS_API
debug_putstr("done.\nBooting the kernel.\n");
return output;
}
+
+void fortify_panic(const char *name)
+{
+ error("detected buffer overflow");
+}
#ifdef CONFIG_X86_32
-extern unsigned long asmlinkage efi_call_phys(void *, ...);
+extern asmlinkage unsigned long efi_call_phys(void *, ...);
#define arch_efi_call_virt_setup() kernel_fpu_begin()
#define arch_efi_call_virt_teardown() kernel_fpu_end()
#define EFI_LOADER_SIGNATURE "EL64"
-extern u64 asmlinkage efi_call(void *fp, ...);
+extern asmlinkage u64 efi_call(void *fp, ...);
#define efi_call_phys(f, args...) efi_call((f), args)
get_random_bytes(&canary, sizeof(canary));
tsc = rdtsc();
canary += tsc + (tsc << 32UL);
+ canary &= CANARY_MASK;
current->stack_canary = canary;
#ifdef CONFIG_X86_64
}
#define __HAVE_ARCH_MEMCPY
+extern void *memcpy(void *, const void *, size_t);
+#ifndef CONFIG_FORTIFY_SOURCE
#ifdef CONFIG_X86_USE_3DNOW
#include <asm/mmx.h>
#endif
#endif
+#endif /* !CONFIG_FORTIFY_SOURCE */
#define __HAVE_ARCH_MEMMOVE
void *memmove(void *dest, const void *src, size_t n);
+extern int memcmp(const void *, const void *, size_t);
+#ifndef CONFIG_FORTIFY_SOURCE
#define memcmp __builtin_memcmp
+#endif
#define __HAVE_ARCH_MEMCHR
extern void *memchr(const void *cs, int c, size_t count);
: __memset_generic((s), (c), (count)))
#define __HAVE_ARCH_MEMSET
+extern void *memset(void *, int, size_t);
+#ifndef CONFIG_FORTIFY_SOURCE
#if (__GNUC__ >= 4)
#define memset(s, c, count) __builtin_memset(s, c, count)
#else
(count)) \
: __memset((s), (c), (count)))
#endif
+#endif /* !CONFIG_FORTIFY_SOURCE */
/*
* find the first occurrence of byte 'c', or 1 past the area if none
extern void *memcpy(void *to, const void *from, size_t len);
extern void *__memcpy(void *to, const void *from, size_t len);
+#ifndef CONFIG_FORTIFY_SOURCE
#ifndef CONFIG_KMEMCHECK
#if (__GNUC__ == 4 && __GNUC_MINOR__ < 3) || __GNUC__ < 4
#define memcpy(dst, src, len) \
*/
#define memcpy(dst, src, len) __inline_memcpy((dst), (src), (len))
#endif
+#endif /* !CONFIG_FORTIFY_SOURCE */
#define __HAVE_ARCH_MEMSET
void *memset(void *s, int c, size_t n);
#define memcpy(dst, src, len) __memcpy(dst, src, len)
#define memmove(dst, src, len) __memmove(dst, src, len)
#define memset(s, c, n) __memset(s, c, n)
+
+#ifndef __NO_FORTIFY
+#define __NO_FORTIFY /* FORTIFY_SOURCE uses __builtin_memcpy, etc. */
+#endif
+
#endif
#define __HAVE_ARCH_MEMCPY_MCSAFE 1
#include <linux/init.h>
#include <linux/delay.h>
-#ifdef CONFIG_HARDLOCKUP_DETECTOR
+#ifdef CONFIG_HARDLOCKUP_DETECTOR_PERF
u64 hw_nmi_get_sample_period(int watchdog_thresh)
{
return (u64)(cpu_khz) * 1000 * watchdog_thresh;
bufp += sizeof(Elf64_Phdr);
phdr->p_type = PT_NOTE;
phdr->p_offset = phdr->p_paddr = paddr_vmcoreinfo_note();
- phdr->p_filesz = phdr->p_memsz = sizeof(vmcoreinfo_note);
+ phdr->p_filesz = phdr->p_memsz = VMCOREINFO_NOTE_SIZE;
(ehdr->e_phnum)++;
#ifdef CONFIG_X86_64
__visible void *memcpy(void *to, const void *from, size_t n)
{
-#ifdef CONFIG_X86_USE_3DNOW
+#if defined(CONFIG_X86_USE_3DNOW) && !defined(CONFIG_FORTIFY_SOURCE)
return __memcpy3d(to, from, n);
#else
return __memcpy(to, from, n);
static unsigned long mmap_base(unsigned long rnd, unsigned long task_size)
{
unsigned long gap = rlimit(RLIMIT_STACK);
+ unsigned long pad = stack_maxrandom_size(task_size) + stack_guard_gap;
unsigned long gap_min, gap_max;
+ /* Values close to RLIM_INFINITY can overflow. */
+ if (gap + pad > gap)
+ gap += pad;
+
/*
* Top of mmap area (just below the process stack).
* Leave an at least ~128 MB hole with possible stack randomization.
*/
- gap_min = SIZE_128M + stack_maxrandom_size(task_size);
+ gap_min = SIZE_128M;
gap_max = (task_size / 6) * 5;
if (gap < gap_min)
phys_addr_t paddr_vmcoreinfo_note(void)
{
if (xen_pv_domain())
- return virt_to_machine(&vmcoreinfo_note).maddr;
+ return virt_to_machine(vmcoreinfo_note).maddr;
else
- return __pa_symbol(&vmcoreinfo_note);
+ return __pa(vmcoreinfo_note);
}
#endif /* CONFIG_KEXEC_CORE */
unsigned long time = random_get_entropy() ^ jiffies;
unsigned long flags;
+ if (!crng_ready()) {
+ crng_fast_load(buf, size);
+ return;
+ }
+
trace_add_device_randomness(size, _RET_IP_);
spin_lock_irqsave(&input_pool.lock, flags);
_mix_pool_bytes(&input_pool, buf, size);
cflags-$(CONFIG_EFI_ARMSTUB) += -I$(srctree)/scripts/dtc/libfdt
KBUILD_CFLAGS := $(cflags-y) -DDISABLE_BRANCH_PROFILING \
+ -D__NO_FORTIFY \
$(call cc-option,-ffreestanding) \
$(call cc-option,-fno-stack-protector)
* again with !__GFP_NORETRY. However, we still
* want to fail this allocation rather than
* trigger the out-of-memory killer and for
- * this we want the future __GFP_MAYFAIL.
+ * this we want __GFP_RETRY_MAYFAIL.
*/
+ gfp |= __GFP_RETRY_MAYFAIL;
}
} while (1);
free_rd_atomic_resource(qp, res);
rxe_advance_resp_resource(qp);
- memcpy(SKB_TO_PKT(skb), &ack_pkt, sizeof(skb->cb));
+ memcpy(SKB_TO_PKT(skb), &ack_pkt, sizeof(ack_pkt));
+ memset((unsigned char *)SKB_TO_PKT(skb) + sizeof(ack_pkt), 0,
+ sizeof(skb->cb) - sizeof(ack_pkt));
res->type = RXE_ATOMIC_MASK;
res->atomic.skb = skb;
* order for ISA to be able to DMA to it.
*/
host->dma_buffer = kmalloc(WBSD_DMA_SIZE,
- GFP_NOIO | GFP_DMA | __GFP_REPEAT | __GFP_NOWARN);
+ GFP_NOIO | GFP_DMA | __GFP_RETRY_MAYFAIL | __GFP_NOWARN);
if (!host->dma_buffer)
goto free;
}
if (!session->response)
session->response = (char *)__get_free_pages(GFP_KERNEL
- | __GFP_REPEAT | GFP_DMA,
+ | __GFP_RETRY_MAYFAIL | GFP_DMA,
get_order(session->bufsize));
if (!session->response) {
mutex_unlock(&session->mutex);
.ndo_start_xmit = ctcmpc_tx,
};
-void static ctcm_dev_setup(struct net_device *dev)
+static void ctcm_dev_setup(struct net_device *dev)
{
dev->type = ARPHRD_SLIP;
dev->tx_queue_len = 100;
return rc;
}
-int inline qeth_l3_get_cast_type(struct qeth_card *card, struct sk_buff *skb)
+inline int qeth_l3_get_cast_type(struct qeth_card *card, struct sk_buff *skb)
{
int cast_type = RTN_UNSPEC;
struct neighbour *n = NULL;
int rc;
se_sess->sess_cmd_map = kzalloc(tag_num * tag_size,
- GFP_KERNEL | __GFP_NOWARN | __GFP_REPEAT);
+ GFP_KERNEL | __GFP_NOWARN | __GFP_RETRY_MAYFAIL);
if (!se_sess->sess_cmd_map) {
se_sess->sess_cmd_map = vzalloc(tag_num * tag_size);
if (!se_sess->sess_cmd_map) {
* called per port from attach...
* @port: port to initialize
*/
-static int inline port_init(struct ioc3_port *port)
+static inline int port_init(struct ioc3_port *port)
{
uint32_t sio_cr;
struct port_hooks *hooks = port->ip_hooks;
* @pending: interrupts to handle
*/
-static int inline
+static inline int
ioc3uart_intr_one(struct ioc3_submodule *is,
struct ioc3_driver_data *idd,
unsigned int pending)
* called per port from attach...
* @port: port to initialize
*/
-static int inline port_init(struct ioc4_port *port)
+static inline int port_init(struct ioc4_port *port)
{
uint32_t sio_cr;
struct hooks *hooks = port->ip_hooks;
* IOC4 with serial ports in the system.
* @idd: Master module data for this IOC4
*/
-static int inline ioc4_attach_local(struct ioc4_driver_data *idd)
+static inline int ioc4_attach_local(struct ioc4_driver_data *idd)
{
struct ioc4_port *port;
struct ioc4_port *ports[IOC4_NUM_SERIAL_PORTS];
* Perform a memcpy and calculate fcs using ppp 10bit CRC algorithm. Return
* new 10 bit FCS.
*/
-static __u16 __inline__ fcs_compute10(unsigned char *sp, int len, __u16 fcs)
+static inline __u16 fcs_compute10(unsigned char *sp, int len, __u16 fcs)
{
for (; len-- > 0; fcs = CRC10_FCS(fcs, *sp++));
return fcs;
struct sk_buff **queue;
int i;
- n = kvmalloc(sizeof *n, GFP_KERNEL | __GFP_REPEAT);
+ n = kvmalloc(sizeof *n, GFP_KERNEL | __GFP_RETRY_MAYFAIL);
if (!n)
return -ENOMEM;
vqs = kmalloc(VHOST_NET_VQ_MAX * sizeof(*vqs), GFP_KERNEL);
struct vhost_virtqueue **vqs;
int r = -ENOMEM, i;
- vs = kzalloc(sizeof(*vs), GFP_KERNEL | __GFP_NOWARN | __GFP_REPEAT);
+ vs = kzalloc(sizeof(*vs), GFP_KERNEL | __GFP_NOWARN | __GFP_RETRY_MAYFAIL);
if (!vs) {
vs = vzalloc(sizeof(*vs));
if (!vs)
/* This struct is large and allocation could fail, fall back to vmalloc
* if there is no other way.
*/
- vsock = kvmalloc(sizeof(*vsock), GFP_KERNEL | __GFP_REPEAT);
+ vsock = kvmalloc(sizeof(*vsock), GFP_KERNEL | __GFP_RETRY_MAYFAIL);
if (!vsock)
return -ENOMEM;
* helper functions *
***************************************************************/
-int __inline__ intelfb_var_to_depth(const struct fb_var_screeninfo *var)
+__inline__ int intelfb_var_to_depth(const struct fb_var_screeninfo *var)
{
DBG_MSG("intelfb_var_to_depth: bpp: %d, green.length is %d\n",
var->bits_per_pixel, var->green.length);
unsigned long vram_size;
} lcdc;
-static void inline enable_irqs(int mask)
+static inline void enable_irqs(int mask)
{
lcdc.irq_mask |= mask;
}
-static void inline disable_irqs(int mask)
+static inline void disable_irqs(int mask)
{
lcdc.irq_mask &= ~mask;
}
}
}
-static void inline setup_regs(void)
+static inline void setup_regs(void)
{
u32 l;
struct lcd_panel *panel = lcdc.fbdev->panel;
config FILE_LOCKING
bool "Enable POSIX file locking API" if EXPERT
default y
- select PERCPU_RWSEM
help
This option enables standard file locking support, required
for filesystems like NFS and for the flock() system
if (i_sblock > info->si_blocks ||
i_eblock > info->si_blocks ||
i_sblock > i_eblock ||
- i_eoff > s_size ||
+ (i_eoff != le32_to_cpu(-1) && i_eoff > s_size) ||
i_sblock * BFS_BSIZE > i_eoff) {
printf("Inode 0x%08x corrupted\n", i);
mutex_lock(&ep->mtx);
for (rbp = rb_first(&ep->rbr); rbp; rbp = rb_next(rbp)) {
struct epitem *epi = rb_entry(rbp, struct epitem, rbn);
+ struct inode *inode = file_inode(epi->ffd.file);
- seq_printf(m, "tfd: %8d events: %8x data: %16llx\n",
+ seq_printf(m, "tfd: %8d events: %8x data: %16llx "
+ " pos:%lli ino:%lx sdev:%x\n",
epi->ffd.fd, epi->event.events,
- (long long)epi->event.data);
+ (long long)epi->event.data,
+ (long long)epi->ffd.file->f_pos,
+ inode->i_ino, inode->i_sb->s_dev);
if (seq_has_overflowed(m))
break;
}
return epir;
}
+#ifdef CONFIG_CHECKPOINT_RESTORE
+static struct epitem *ep_find_tfd(struct eventpoll *ep, int tfd, unsigned long toff)
+{
+ struct rb_node *rbp;
+ struct epitem *epi;
+
+ for (rbp = rb_first(&ep->rbr); rbp; rbp = rb_next(rbp)) {
+ epi = rb_entry(rbp, struct epitem, rbn);
+ if (epi->ffd.fd == tfd) {
+ if (toff == 0)
+ return epi;
+ else
+ toff--;
+ }
+ cond_resched();
+ }
+
+ return NULL;
+}
+
+struct file *get_epoll_tfile_raw_ptr(struct file *file, int tfd,
+ unsigned long toff)
+{
+ struct file *file_raw;
+ struct eventpoll *ep;
+ struct epitem *epi;
+
+ if (!is_file_epoll(file))
+ return ERR_PTR(-EINVAL);
+
+ ep = file->private_data;
+
+ mutex_lock(&ep->mtx);
+ epi = ep_find_tfd(ep, tfd, toff);
+ if (epi)
+ file_raw = epi->ffd.file;
+ else
+ file_raw = ERR_PTR(-ENOENT);
+ mutex_unlock(&ep->mtx);
+
+ return file_raw;
+}
+#endif /* CONFIG_CHECKPOINT_RESTORE */
+
/*
* This is the callback that is passed to the wait queue wakeup
* mechanism. It is called by the stored file descriptors when they
struct page *page = radix_tree_deref_slot_protected(slot,
&mapping->tree_lock);
if (likely(page) && PageDirty(page)) {
- __dec_wb_stat(old_wb, WB_RECLAIMABLE);
- __inc_wb_stat(new_wb, WB_RECLAIMABLE);
+ dec_wb_stat(old_wb, WB_RECLAIMABLE);
+ inc_wb_stat(new_wb, WB_RECLAIMABLE);
}
}
&mapping->tree_lock);
if (likely(page)) {
WARN_ON_ONCE(!PageWriteback(page));
- __dec_wb_stat(old_wb, WB_WRITEBACK);
- __inc_wb_stat(new_wb, WB_WRITEBACK);
+ dec_wb_stat(old_wb, WB_WRITEBACK);
+ inc_wb_stat(new_wb, WB_WRITEBACK);
}
}
.write = proc_fault_inject_write,
.llseek = generic_file_llseek,
};
+
+static ssize_t proc_fail_nth_write(struct file *file, const char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ struct task_struct *task;
+ int err, n;
+
+ task = get_proc_task(file_inode(file));
+ if (!task)
+ return -ESRCH;
+ put_task_struct(task);
+ if (task != current)
+ return -EPERM;
+ err = kstrtoint_from_user(buf, count, 10, &n);
+ if (err)
+ return err;
+ if (n < 0 || n == INT_MAX)
+ return -EINVAL;
+ current->fail_nth = n + 1;
+ return count;
+}
+
+static ssize_t proc_fail_nth_read(struct file *file, char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ struct task_struct *task;
+ int err;
+
+ task = get_proc_task(file_inode(file));
+ if (!task)
+ return -ESRCH;
+ put_task_struct(task);
+ if (task != current)
+ return -EPERM;
+ if (count < 1)
+ return -EINVAL;
+ err = put_user((char)(current->fail_nth ? 'N' : 'Y'), buf);
+ if (err)
+ return err;
+ current->fail_nth = 0;
+ return 1;
+}
+
+static const struct file_operations proc_fail_nth_operations = {
+ .read = proc_fail_nth_read,
+ .write = proc_fail_nth_write,
+};
#endif
#endif
#ifdef CONFIG_FAULT_INJECTION
REG("make-it-fail", S_IRUGO|S_IWUSR, proc_fault_inject_operations),
+ /*
+ * Operations on the file check that the task is current,
+ * so we create it with 0666 to support testing under unprivileged user.
+ */
+ REG("fail-nth", 0666, proc_fail_nth_operations),
#endif
#ifdef CONFIG_TASK_IO_ACCOUNTING
ONE("io", S_IRUSR, proc_tid_io_accounting),
return -EINVAL;
}
+static int sysctl_check_table_array(const char *path, struct ctl_table *table)
+{
+ int err = 0;
+
+ if ((table->proc_handler == proc_douintvec) ||
+ (table->proc_handler == proc_douintvec_minmax)) {
+ if (table->maxlen != sizeof(unsigned int))
+ err |= sysctl_err(path, table, "array now allowed");
+ }
+
+ return err;
+}
+
static int sysctl_check_table(const char *path, struct ctl_table *table)
{
int err = 0;
for (; table->procname; table++) {
if (table->child)
- err = sysctl_err(path, table, "Not a file");
+ err |= sysctl_err(path, table, "Not a file");
if ((table->proc_handler == proc_dostring) ||
(table->proc_handler == proc_dointvec) ||
(table->proc_handler == proc_douintvec) ||
+ (table->proc_handler == proc_douintvec_minmax) ||
(table->proc_handler == proc_dointvec_minmax) ||
(table->proc_handler == proc_dointvec_jiffies) ||
(table->proc_handler == proc_dointvec_userhz_jiffies) ||
(table->proc_handler == proc_doulongvec_minmax) ||
(table->proc_handler == proc_doulongvec_ms_jiffies_minmax)) {
if (!table->data)
- err = sysctl_err(path, table, "No data");
+ err |= sysctl_err(path, table, "No data");
if (!table->maxlen)
- err = sysctl_err(path, table, "No maxlen");
+ err |= sysctl_err(path, table, "No maxlen");
+ else
+ err |= sysctl_check_table_array(path, table);
}
if (!table->proc_handler)
- err = sysctl_err(path, table, "No proc_handler");
+ err |= sysctl_err(path, table, "No proc_handler");
if ((table->mode & (S_IRUGO|S_IWUGO)) != table->mode)
- err = sysctl_err(path, table, "bogus .mode 0%o",
+ err |= sysctl_err(path, table, "bogus .mode 0%o",
table->mode);
}
return err;
lflags &= ~__GFP_FS;
}
+ /*
+ * Default page/slab allocator behavior is to retry for ever
+ * for small allocations. We can override this behavior by using
+ * __GFP_RETRY_MAYFAIL which will tell the allocator to retry as long
+ * as it is feasible but rather fail than retry forever for all
+ * request sizes.
+ */
+ if (flags & KM_MAYFAIL)
+ lflags |= __GFP_RETRY_MAYFAIL;
+
if (flags & KM_ZERO)
lflags |= __GFP_ZERO;
percpu_counter_add_batch(&wb->stat[item], amount, WB_STAT_BATCH);
}
-static inline void __inc_wb_stat(struct bdi_writeback *wb,
- enum wb_stat_item item)
-{
- __add_wb_stat(wb, item, 1);
-}
-
static inline void inc_wb_stat(struct bdi_writeback *wb, enum wb_stat_item item)
{
- unsigned long flags;
-
- local_irq_save(flags);
- __inc_wb_stat(wb, item);
- local_irq_restore(flags);
-}
-
-static inline void __dec_wb_stat(struct bdi_writeback *wb,
- enum wb_stat_item item)
-{
- __add_wb_stat(wb, item, -1);
+ __add_wb_stat(wb, item, 1);
}
static inline void dec_wb_stat(struct bdi_writeback *wb, enum wb_stat_item item)
{
- unsigned long flags;
-
- local_irq_save(flags);
- __dec_wb_stat(wb, item);
- local_irq_restore(flags);
+ __add_wb_stat(wb, item, -1);
}
static inline s64 wb_stat(struct bdi_writeback *wb, enum wb_stat_item item)
CRASH_CORE_NOTE_NAME_BYTES + \
CRASH_CORE_NOTE_DESC_BYTES)
-#define VMCOREINFO_BYTES (4096)
+#define VMCOREINFO_BYTES PAGE_SIZE
#define VMCOREINFO_NOTE_NAME "VMCOREINFO"
#define VMCOREINFO_NOTE_NAME_BYTES ALIGN(sizeof(VMCOREINFO_NOTE_NAME), 4)
#define VMCOREINFO_NOTE_SIZE ((CRASH_CORE_NOTE_HEAD_BYTES * 2) + \
typedef u32 note_buf_t[CRASH_CORE_NOTE_BYTES/4];
+void crash_update_vmcoreinfo_safecopy(void *ptr);
void crash_save_vmcoreinfo(void);
void arch_crash_save_vmcoreinfo(void);
__printf(1, 2)
#define VMCOREINFO_CONFIG(name) \
vmcoreinfo_append_str("CONFIG_%s=y\n", #name)
-extern u32 vmcoreinfo_note[VMCOREINFO_NOTE_SIZE/4];
-extern size_t vmcoreinfo_size;
-extern size_t vmcoreinfo_max_size;
+extern u32 *vmcoreinfo_note;
Elf_Word *append_elf_note(Elf_Word *buf, char *name, unsigned int type,
void *data, size_t data_len);
}
struct name_snapshot {
- const char *name;
- char inline_name[DNAME_INLINE_LEN];
+ const unsigned char *name;
+ unsigned char inline_name[DNAME_INLINE_LEN];
};
void take_dentry_name_snapshot(struct name_snapshot *, struct dentry *);
void release_dentry_name_snapshot(struct name_snapshot *);
#define _LINUX_EVENTPOLL_H
#include <uapi/linux/eventpoll.h>
+#include <uapi/linux/kcmp.h>
/* Forward declarations to avoid compiler errors */
#ifdef CONFIG_EPOLL
+#ifdef CONFIG_CHECKPOINT_RESTORE
+struct file *get_epoll_tfile_raw_ptr(struct file *file, int tfd, unsigned long toff);
+#endif
+
/* Used to initialize the epoll bits inside the "struct file" */
static inline void eventpoll_init_file(struct file *file)
{
#define ___GFP_FS 0x80u
#define ___GFP_COLD 0x100u
#define ___GFP_NOWARN 0x200u
-#define ___GFP_REPEAT 0x400u
+#define ___GFP_RETRY_MAYFAIL 0x400u
#define ___GFP_NOFAIL 0x800u
#define ___GFP_NORETRY 0x1000u
#define ___GFP_MEMALLOC 0x2000u
*
* __GFP_RECLAIM is shorthand to allow/forbid both direct and kswapd reclaim.
*
- * __GFP_REPEAT: Try hard to allocate the memory, but the allocation attempt
- * _might_ fail. This depends upon the particular VM implementation.
+ * The default allocator behavior depends on the request size. We have a concept
+ * of so called costly allocations (with order > PAGE_ALLOC_COSTLY_ORDER).
+ * !costly allocations are too essential to fail so they are implicitly
+ * non-failing by default (with some exceptions like OOM victims might fail so
+ * the caller still has to check for failures) while costly requests try to be
+ * not disruptive and back off even without invoking the OOM killer.
+ * The following three modifiers might be used to override some of these
+ * implicit rules
+ *
+ * __GFP_NORETRY: The VM implementation will try only very lightweight
+ * memory direct reclaim to get some memory under memory pressure (thus
+ * it can sleep). It will avoid disruptive actions like OOM killer. The
+ * caller must handle the failure which is quite likely to happen under
+ * heavy memory pressure. The flag is suitable when failure can easily be
+ * handled at small cost, such as reduced throughput
+ *
+ * __GFP_RETRY_MAYFAIL: The VM implementation will retry memory reclaim
+ * procedures that have previously failed if there is some indication
+ * that progress has been made else where. It can wait for other
+ * tasks to attempt high level approaches to freeing memory such as
+ * compaction (which removes fragmentation) and page-out.
+ * There is still a definite limit to the number of retries, but it is
+ * a larger limit than with __GFP_NORETRY.
+ * Allocations with this flag may fail, but only when there is
+ * genuinely little unused memory. While these allocations do not
+ * directly trigger the OOM killer, their failure indicates that
+ * the system is likely to need to use the OOM killer soon. The
+ * caller must handle failure, but can reasonably do so by failing
+ * a higher-level request, or completing it only in a much less
+ * efficient manner.
+ * If the allocation does fail, and the caller is in a position to
+ * free some non-essential memory, doing so could benefit the system
+ * as a whole.
*
* __GFP_NOFAIL: The VM implementation _must_ retry infinitely: the caller
- * cannot handle allocation failures. New users should be evaluated carefully
- * (and the flag should be used only when there is no reasonable failure
- * policy) but it is definitely preferable to use the flag rather than
- * opencode endless loop around allocator.
- *
- * __GFP_NORETRY: The VM implementation must not retry indefinitely and will
- * return NULL when direct reclaim and memory compaction have failed to allow
- * the allocation to succeed. The OOM killer is not called with the current
- * implementation.
+ * cannot handle allocation failures. The allocation could block
+ * indefinitely but will never return with failure. Testing for
+ * failure is pointless.
+ * New users should be evaluated carefully (and the flag should be
+ * used only when there is no reasonable failure policy) but it is
+ * definitely preferable to use the flag rather than opencode endless
+ * loop around allocator.
+ * Using this flag for costly allocations is _highly_ discouraged.
*/
#define __GFP_IO ((__force gfp_t)___GFP_IO)
#define __GFP_FS ((__force gfp_t)___GFP_FS)
#define __GFP_DIRECT_RECLAIM ((__force gfp_t)___GFP_DIRECT_RECLAIM) /* Caller can reclaim */
#define __GFP_KSWAPD_RECLAIM ((__force gfp_t)___GFP_KSWAPD_RECLAIM) /* kswapd can wake */
#define __GFP_RECLAIM ((__force gfp_t)(___GFP_DIRECT_RECLAIM|___GFP_KSWAPD_RECLAIM))
-#define __GFP_REPEAT ((__force gfp_t)___GFP_REPEAT)
+#define __GFP_RETRY_MAYFAIL ((__force gfp_t)___GFP_RETRY_MAYFAIL)
#define __GFP_NOFAIL ((__force gfp_t)___GFP_NOFAIL)
#define __GFP_NORETRY ((__force gfp_t)___GFP_NORETRY)
umode_t mode;
unsigned long seq;
void *security;
+
+ struct rcu_head rcu;
+ atomic_t refcount;
} ____cacheline_aligned_in_smp;
#endif /* _LINUX_IPC_H */
#include <linux/log2.h>
#include <linux/typecheck.h>
#include <linux/printk.h>
+#include <linux/build_bug.h>
#include <asm/byteorder.h>
#include <uapi/linux/kernel.h>
* @member: the name of the member within the struct.
*
*/
-#define container_of(ptr, type, member) ({ \
- const typeof( ((type *)0)->member ) *__mptr = (ptr); \
- (type *)( (char *)__mptr - offsetof(type,member) );})
+#define container_of(ptr, type, member) ({ \
+ void *__mptr = (void *)(ptr); \
+ BUILD_BUG_ON_MSG(!__same_type(*(ptr), ((type *)0)->member) && \
+ !__same_type(*(ptr), void), \
+ "pointer type mismatch in container_of()"); \
+ ((type *)(__mptr - offsetof(type, member))); })
/* Rebuild everything on CONFIG_FTRACE_MCOUNT_RECORD */
#ifdef CONFIG_FTRACE_MCOUNT_RECORD
unsigned long start;
struct page *control_code_page;
struct page *swap_page;
+ void *vmcoreinfo_data_copy; /* locates in the crash memory */
unsigned long nr_segments;
struct kexec_segment segment[KEXEC_SEGMENT_MAX];
int kexec_should_crash(struct task_struct *);
int kexec_crash_loaded(void);
void crash_save_cpu(struct pt_regs *regs, int cpu);
+extern int kimage_crash_copy_vmcoreinfo(struct kimage *image);
extern struct kimage *kexec_image;
extern struct kimage *kexec_crash_image;
static inline struct page *new_page_nodemask(struct page *page,
int preferred_nid, nodemask_t *nodemask)
{
- gfp_t gfp_mask = GFP_USER | __GFP_MOVABLE;
+ gfp_t gfp_mask = GFP_USER | __GFP_MOVABLE | __GFP_RETRY_MAYFAIL;
if (PageHuge(page))
return alloc_huge_page_nodemask(page_hstate(compound_head(page)),
#include <linux/sched.h>
#include <asm/irq.h>
+#if defined(CONFIG_HAVE_NMI_WATCHDOG)
+#include <asm/nmi.h>
+#endif
#ifdef CONFIG_LOCKUP_DETECTOR
+void lockup_detector_init(void);
+#else
+static inline void lockup_detector_init(void)
+{
+}
+#endif
+
+#ifdef CONFIG_SOFTLOCKUP_DETECTOR
extern void touch_softlockup_watchdog_sched(void);
extern void touch_softlockup_watchdog(void);
extern void touch_softlockup_watchdog_sync(void);
extern void touch_all_softlockup_watchdogs(void);
-extern int proc_dowatchdog_thresh(struct ctl_table *table, int write,
- void __user *buffer,
- size_t *lenp, loff_t *ppos);
extern unsigned int softlockup_panic;
-extern unsigned int hardlockup_panic;
-void lockup_detector_init(void);
+extern int soft_watchdog_enabled;
+extern atomic_t watchdog_park_in_progress;
#else
static inline void touch_softlockup_watchdog_sched(void)
{
static inline void touch_all_softlockup_watchdogs(void)
{
}
-static inline void lockup_detector_init(void)
-{
-}
#endif
#ifdef CONFIG_DETECT_HUNG_TASK
#define NMI_WATCHDOG_ENABLED (1 << NMI_WATCHDOG_ENABLED_BIT)
#define SOFT_WATCHDOG_ENABLED (1 << SOFT_WATCHDOG_ENABLED_BIT)
+#if defined(CONFIG_HARDLOCKUP_DETECTOR)
+extern void hardlockup_detector_disable(void);
+extern unsigned int hardlockup_panic;
+#else
+static inline void hardlockup_detector_disable(void) {}
+#endif
+
+#if defined(CONFIG_HARDLOCKUP_DETECTOR_PERF)
+extern void arch_touch_nmi_watchdog(void);
+#else
+#if !defined(CONFIG_HAVE_NMI_WATCHDOG)
+static inline void arch_touch_nmi_watchdog(void) {}
+#endif
+#endif
+
/**
* touch_nmi_watchdog - restart NMI watchdog timeout.
*
* may be used to reset the timeout - for code which intentionally
* disables interrupts for a long time. This call is stateless.
*/
-#if defined(CONFIG_HAVE_NMI_WATCHDOG) || defined(CONFIG_HARDLOCKUP_DETECTOR)
-#include <asm/nmi.h>
-extern void touch_nmi_watchdog(void);
-#else
static inline void touch_nmi_watchdog(void)
{
+ arch_touch_nmi_watchdog();
touch_softlockup_watchdog();
}
-#endif
-
-#if defined(CONFIG_HARDLOCKUP_DETECTOR)
-extern void hardlockup_detector_disable(void);
-#else
-static inline void hardlockup_detector_disable(void) {}
-#endif
/*
* Create trigger_all_cpu_backtrace() out of the arch-provided
}
#endif
-#ifdef CONFIG_LOCKUP_DETECTOR
+#ifdef CONFIG_HARDLOCKUP_DETECTOR_PERF
u64 hw_nmi_get_sample_period(int watchdog_thresh);
+#endif
+
+#ifdef CONFIG_LOCKUP_DETECTOR
extern int nmi_watchdog_enabled;
-extern int soft_watchdog_enabled;
extern int watchdog_user_enabled;
extern int watchdog_thresh;
extern unsigned long watchdog_enabled;
+extern struct cpumask watchdog_cpumask;
extern unsigned long *watchdog_cpumask_bits;
-extern atomic_t watchdog_park_in_progress;
+extern int __read_mostly watchdog_suspended;
#ifdef CONFIG_SMP
extern int sysctl_softlockup_all_cpu_backtrace;
extern int sysctl_hardlockup_all_cpu_backtrace;
void (*report_oc)(struct s3c2410_hcd_info *, int ports);
};
-static void inline s3c2410_usb_report_oc(struct s3c2410_hcd_info *info, int ports)
+static inline void s3c2410_usb_report_oc(struct s3c2410_hcd_info *info, int ports)
{
if (info->report_oc != NULL) {
(info->report_oc)(info, ports);
#endif
}
+/*
+ * On 64-bit architectures, protect against non-terminated C string overflows
+ * by zeroing out the first byte of the canary; this leaves 56 bits of entropy.
+ */
+#ifdef CONFIG_64BIT
+# ifdef __LITTLE_ENDIAN
+# define CANARY_MASK 0xffffffffffffff00UL
+# else /* big endian, 64 bits: */
+# define CANARY_MASK 0x00ffffffffffffffUL
+# endif
+#else /* 32 bits: */
+# define CANARY_MASK 0xffffffffUL
+#endif
+
+static inline unsigned long get_random_canary(void)
+{
+ unsigned long val = get_random_long();
+
+ return val & CANARY_MASK;
+}
+
unsigned long randomize_page(unsigned long start, unsigned long range);
u32 prandom_u32(void);
#ifdef CONFIG_FAULT_INJECTION
int make_it_fail;
+ int fail_nth;
#endif
/*
* When (nr_dirtied >= nr_dirtied_pause), it's time to call
struct task_struct;
+/* One semaphore structure for each semaphore in the system. */
+struct sem {
+ int semval; /* current value */
+ /*
+ * PID of the process that last modified the semaphore. For
+ * Linux, specifically these are:
+ * - semop
+ * - semctl, via SETVAL and SETALL.
+ * - at task exit when performing undo adjustments (see exit_sem).
+ */
+ int sempid;
+ spinlock_t lock; /* spinlock for fine-grained semtimedop */
+ struct list_head pending_alter; /* pending single-sop operations */
+ /* that alter the semaphore */
+ struct list_head pending_const; /* pending single-sop operations */
+ /* that do not alter the semaphore*/
+ time_t sem_otime; /* candidate for sem_otime */
+} ____cacheline_aligned_in_smp;
+
/* One sem_array data structure for each set of semaphores in the system. */
struct sem_array {
struct kern_ipc_perm sem_perm; /* permissions .. see ipc.h */
- time_t sem_ctime; /* last change time */
- struct sem *sem_base; /* ptr to first semaphore in array */
+ time_t sem_ctime; /* create/last semctl() time */
struct list_head pending_alter; /* pending operations */
/* that alter the array */
struct list_head pending_const; /* pending complex operations */
int sem_nsems; /* no. of semaphores in array */
int complex_count; /* pending complex operations */
unsigned int use_global_lock;/* >0: global lock required */
+
+ struct sem sems[];
};
#ifdef CONFIG_SYSVIPC
*
* %__GFP_NOWARN - If allocation fails, don't issue any warnings.
*
- * %__GFP_REPEAT - If allocation fails initially, try once more before failing.
+ * %__GFP_RETRY_MAYFAIL - Try really hard to succeed the allocation but fail
+ * eventually.
*
* There are other flags available as well, but these are not intended
* for general use, and so are not documented here. For a full list of
return tail ? tail + 1 : path;
}
+#define __FORTIFY_INLINE extern __always_inline __attribute__((gnu_inline))
+#define __RENAME(x) __asm__(#x)
+
+void fortify_panic(const char *name) __noreturn __cold;
+void __read_overflow(void) __compiletime_error("detected read beyond size of object passed as 1st parameter");
+void __read_overflow2(void) __compiletime_error("detected read beyond size of object passed as 2nd parameter");
+void __write_overflow(void) __compiletime_error("detected write beyond size of object passed as 1st parameter");
+
+#if !defined(__NO_FORTIFY) && defined(__OPTIMIZE__) && defined(CONFIG_FORTIFY_SOURCE)
+__FORTIFY_INLINE char *strcpy(char *p, const char *q)
+{
+ size_t p_size = __builtin_object_size(p, 0);
+ size_t q_size = __builtin_object_size(q, 0);
+ if (p_size == (size_t)-1 && q_size == (size_t)-1)
+ return __builtin_strcpy(p, q);
+ if (strscpy(p, q, p_size < q_size ? p_size : q_size) < 0)
+ fortify_panic(__func__);
+ return p;
+}
+
+__FORTIFY_INLINE char *strncpy(char *p, const char *q, __kernel_size_t size)
+{
+ size_t p_size = __builtin_object_size(p, 0);
+ if (__builtin_constant_p(size) && p_size < size)
+ __write_overflow();
+ if (p_size < size)
+ fortify_panic(__func__);
+ return __builtin_strncpy(p, q, size);
+}
+
+__FORTIFY_INLINE char *strcat(char *p, const char *q)
+{
+ size_t p_size = __builtin_object_size(p, 0);
+ if (p_size == (size_t)-1)
+ return __builtin_strcat(p, q);
+ if (strlcat(p, q, p_size) >= p_size)
+ fortify_panic(__func__);
+ return p;
+}
+
+__FORTIFY_INLINE __kernel_size_t strlen(const char *p)
+{
+ __kernel_size_t ret;
+ size_t p_size = __builtin_object_size(p, 0);
+ if (p_size == (size_t)-1)
+ return __builtin_strlen(p);
+ ret = strnlen(p, p_size);
+ if (p_size <= ret)
+ fortify_panic(__func__);
+ return ret;
+}
+
+extern __kernel_size_t __real_strnlen(const char *, __kernel_size_t) __RENAME(strnlen);
+__FORTIFY_INLINE __kernel_size_t strnlen(const char *p, __kernel_size_t maxlen)
+{
+ size_t p_size = __builtin_object_size(p, 0);
+ __kernel_size_t ret = __real_strnlen(p, maxlen < p_size ? maxlen : p_size);
+ if (p_size <= ret && maxlen != ret)
+ fortify_panic(__func__);
+ return ret;
+}
+
+/* defined after fortified strlen to reuse it */
+extern size_t __real_strlcpy(char *, const char *, size_t) __RENAME(strlcpy);
+__FORTIFY_INLINE size_t strlcpy(char *p, const char *q, size_t size)
+{
+ size_t ret;
+ size_t p_size = __builtin_object_size(p, 0);
+ size_t q_size = __builtin_object_size(q, 0);
+ if (p_size == (size_t)-1 && q_size == (size_t)-1)
+ return __real_strlcpy(p, q, size);
+ ret = strlen(q);
+ if (size) {
+ size_t len = (ret >= size) ? size - 1 : ret;
+ if (__builtin_constant_p(len) && len >= p_size)
+ __write_overflow();
+ if (len >= p_size)
+ fortify_panic(__func__);
+ __builtin_memcpy(p, q, len);
+ p[len] = '\0';
+ }
+ return ret;
+}
+
+/* defined after fortified strlen and strnlen to reuse them */
+__FORTIFY_INLINE char *strncat(char *p, const char *q, __kernel_size_t count)
+{
+ size_t p_len, copy_len;
+ size_t p_size = __builtin_object_size(p, 0);
+ size_t q_size = __builtin_object_size(q, 0);
+ if (p_size == (size_t)-1 && q_size == (size_t)-1)
+ return __builtin_strncat(p, q, count);
+ p_len = strlen(p);
+ copy_len = strnlen(q, count);
+ if (p_size < p_len + copy_len + 1)
+ fortify_panic(__func__);
+ __builtin_memcpy(p + p_len, q, copy_len);
+ p[p_len + copy_len] = '\0';
+ return p;
+}
+
+__FORTIFY_INLINE void *memset(void *p, int c, __kernel_size_t size)
+{
+ size_t p_size = __builtin_object_size(p, 0);
+ if (__builtin_constant_p(size) && p_size < size)
+ __write_overflow();
+ if (p_size < size)
+ fortify_panic(__func__);
+ return __builtin_memset(p, c, size);
+}
+
+__FORTIFY_INLINE void *memcpy(void *p, const void *q, __kernel_size_t size)
+{
+ size_t p_size = __builtin_object_size(p, 0);
+ size_t q_size = __builtin_object_size(q, 0);
+ if (__builtin_constant_p(size)) {
+ if (p_size < size)
+ __write_overflow();
+ if (q_size < size)
+ __read_overflow2();
+ }
+ if (p_size < size || q_size < size)
+ fortify_panic(__func__);
+ return __builtin_memcpy(p, q, size);
+}
+
+__FORTIFY_INLINE void *memmove(void *p, const void *q, __kernel_size_t size)
+{
+ size_t p_size = __builtin_object_size(p, 0);
+ size_t q_size = __builtin_object_size(q, 0);
+ if (__builtin_constant_p(size)) {
+ if (p_size < size)
+ __write_overflow();
+ if (q_size < size)
+ __read_overflow2();
+ }
+ if (p_size < size || q_size < size)
+ fortify_panic(__func__);
+ return __builtin_memmove(p, q, size);
+}
+
+extern void *__real_memscan(void *, int, __kernel_size_t) __RENAME(memscan);
+__FORTIFY_INLINE void *memscan(void *p, int c, __kernel_size_t size)
+{
+ size_t p_size = __builtin_object_size(p, 0);
+ if (__builtin_constant_p(size) && p_size < size)
+ __read_overflow();
+ if (p_size < size)
+ fortify_panic(__func__);
+ return __real_memscan(p, c, size);
+}
+
+__FORTIFY_INLINE int memcmp(const void *p, const void *q, __kernel_size_t size)
+{
+ size_t p_size = __builtin_object_size(p, 0);
+ size_t q_size = __builtin_object_size(q, 0);
+ if (__builtin_constant_p(size)) {
+ if (p_size < size)
+ __read_overflow();
+ if (q_size < size)
+ __read_overflow2();
+ }
+ if (p_size < size || q_size < size)
+ fortify_panic(__func__);
+ return __builtin_memcmp(p, q, size);
+}
+
+__FORTIFY_INLINE void *memchr(const void *p, int c, __kernel_size_t size)
+{
+ size_t p_size = __builtin_object_size(p, 0);
+ if (__builtin_constant_p(size) && p_size < size)
+ __read_overflow();
+ if (p_size < size)
+ fortify_panic(__func__);
+ return __builtin_memchr(p, c, size);
+}
+
+void *__real_memchr_inv(const void *s, int c, size_t n) __RENAME(memchr_inv);
+__FORTIFY_INLINE void *memchr_inv(const void *p, int c, size_t size)
+{
+ size_t p_size = __builtin_object_size(p, 0);
+ if (__builtin_constant_p(size) && p_size < size)
+ __read_overflow();
+ if (p_size < size)
+ fortify_panic(__func__);
+ return __real_memchr_inv(p, c, size);
+}
+
+extern void *__real_kmemdup(const void *src, size_t len, gfp_t gfp) __RENAME(kmemdup);
+__FORTIFY_INLINE void *kmemdup(const void *p, size_t size, gfp_t gfp)
+{
+ size_t p_size = __builtin_object_size(p, 0);
+ if (__builtin_constant_p(size) && p_size < size)
+ __read_overflow();
+ if (p_size < size)
+ fortify_panic(__func__);
+ return __real_kmemdup(p, size, gfp);
+}
+#endif
+
#endif /* _LINUX_STRING_H_ */
void __user *, size_t *, loff_t *);
extern int proc_dointvec_minmax(struct ctl_table *, int,
void __user *, size_t *, loff_t *);
+extern int proc_douintvec_minmax(struct ctl_table *table, int write,
+ void __user *buffer, size_t *lenp,
+ loff_t *ppos);
extern int proc_dointvec_jiffies(struct ctl_table *, int,
void __user *, size_t *, loff_t *);
extern int proc_dointvec_userhz_jiffies(struct ctl_table *, int,
{(unsigned long)__GFP_FS, "__GFP_FS"}, \
{(unsigned long)__GFP_COLD, "__GFP_COLD"}, \
{(unsigned long)__GFP_NOWARN, "__GFP_NOWARN"}, \
- {(unsigned long)__GFP_REPEAT, "__GFP_REPEAT"}, \
+ {(unsigned long)__GFP_RETRY_MAYFAIL, "__GFP_RETRY_MAYFAIL"}, \
{(unsigned long)__GFP_NOFAIL, "__GFP_NOFAIL"}, \
{(unsigned long)__GFP_NORETRY, "__GFP_NORETRY"}, \
{(unsigned long)__GFP_COMP, "__GFP_COMP"}, \
#ifndef _UAPI_LINUX_KCMP_H
#define _UAPI_LINUX_KCMP_H
+#include <linux/types.h>
+
/* Comparison type */
enum kcmp_type {
KCMP_FILE,
KCMP_SIGHAND,
KCMP_IO,
KCMP_SYSVSEM,
+ KCMP_EPOLL_TFD,
KCMP_TYPES,
};
+/* Slot for KCMP_EPOLL_TFD */
+struct kcmp_epoll_slot {
+ __u32 efd; /* epoll file descriptor */
+ __u32 tfd; /* target file number */
+ __u32 toff; /* target offset within same numbered sequence */
+};
+
#endif /* _UAPI_LINUX_KCMP_H */
struct semid_ds {
struct ipc_perm sem_perm; /* permissions .. see ipc.h */
__kernel_time_t sem_otime; /* last semop time */
- __kernel_time_t sem_ctime; /* last change time */
+ __kernel_time_t sem_ctime; /* create/last semctl() time */
struct sem *sem_base; /* ptr to first semaphore in array */
struct sem_queue *sem_pending; /* pending operations to be processed */
struct sem_queue **sem_pending_last; /* last pending operation */
/*
* Set up the initial canary ASAP:
*/
+ add_latent_entropy();
boot_init_stack_canary();
cgroup_init_early();
static void msg_rcu_free(struct rcu_head *head)
{
- struct ipc_rcu *p = container_of(head, struct ipc_rcu, rcu);
- struct msg_queue *msq = ipc_rcu_to_struct(p);
+ struct kern_ipc_perm *p = container_of(head, struct kern_ipc_perm, rcu);
+ struct msg_queue *msq = container_of(p, struct msg_queue, q_perm);
security_msg_queue_free(msq);
- ipc_rcu_free(head);
+ kvfree(msq);
}
/**
static int newque(struct ipc_namespace *ns, struct ipc_params *params)
{
struct msg_queue *msq;
- int id, retval;
+ int retval;
key_t key = params->key;
int msgflg = params->flg;
- msq = ipc_rcu_alloc(sizeof(*msq));
- if (!msq)
+ msq = kvmalloc(sizeof(*msq), GFP_KERNEL);
+ if (unlikely(!msq))
return -ENOMEM;
msq->q_perm.mode = msgflg & S_IRWXUGO;
msq->q_perm.security = NULL;
retval = security_msg_queue_alloc(msq);
if (retval) {
- ipc_rcu_putref(msq, ipc_rcu_free);
+ kvfree(msq);
return retval;
}
INIT_LIST_HEAD(&msq->q_senders);
/* ipc_addid() locks msq upon success. */
- id = ipc_addid(&msg_ids(ns), &msq->q_perm, ns->msg_ctlmni);
- if (id < 0) {
- ipc_rcu_putref(msq, msg_rcu_free);
- return id;
+ retval = ipc_addid(&msg_ids(ns), &msq->q_perm, ns->msg_ctlmni);
+ if (retval < 0) {
+ call_rcu(&msq->q_perm.rcu, msg_rcu_free);
+ return retval;
}
ipc_unlock_object(&msq->q_perm);
free_msg(msg);
}
atomic_sub(msq->q_cbytes, &ns->msg_bytes);
- ipc_rcu_putref(msq, msg_rcu_free);
+ ipc_rcu_putref(&msq->q_perm, msg_rcu_free);
}
/*
/* enqueue the sender and prepare to block */
ss_add(msq, &s, msgsz);
- if (!ipc_rcu_getref(msq)) {
+ if (!ipc_rcu_getref(&msq->q_perm)) {
err = -EIDRM;
goto out_unlock0;
}
rcu_read_lock();
ipc_lock_object(&msq->q_perm);
- ipc_rcu_putref(msq, msg_rcu_free);
+ ipc_rcu_putref(&msq->q_perm, msg_rcu_free);
/* raced with RMID? */
if (!ipc_valid_object(&msq->q_perm)) {
err = -EIDRM;
#include <linux/uaccess.h>
#include "util.h"
-/* One semaphore structure for each semaphore in the system. */
-struct sem {
- int semval; /* current value */
- /*
- * PID of the process that last modified the semaphore. For
- * Linux, specifically these are:
- * - semop
- * - semctl, via SETVAL and SETALL.
- * - at task exit when performing undo adjustments (see exit_sem).
- */
- int sempid;
- spinlock_t lock; /* spinlock for fine-grained semtimedop */
- struct list_head pending_alter; /* pending single-sop operations */
- /* that alter the semaphore */
- struct list_head pending_const; /* pending single-sop operations */
- /* that do not alter the semaphore*/
- time_t sem_otime; /* candidate for sem_otime */
-} ____cacheline_aligned_in_smp;
/* One queue for each sleeping process in the system. */
struct sem_queue {
* sem_array.sem_undo
*
* b) global or semaphore sem_lock() for read/write:
- * sem_array.sem_base[i].pending_{const,alter}:
+ * sem_array.sems[i].pending_{const,alter}:
*
* c) special:
* sem_undo_list.list_proc:
*/
list_for_each_entry_safe(q, tq, &sma->pending_alter, list) {
struct sem *curr;
- curr = &sma->sem_base[q->sops[0].sem_num];
+ curr = &sma->sems[q->sops[0].sem_num];
list_add_tail(&q->list, &curr->pending_alter);
}
{
int i;
for (i = 0; i < sma->sem_nsems; i++) {
- struct sem *sem = sma->sem_base + i;
+ struct sem *sem = &sma->sems[i];
list_splice_init(&sem->pending_alter, &sma->pending_alter);
}
static void sem_rcu_free(struct rcu_head *head)
{
- struct ipc_rcu *p = container_of(head, struct ipc_rcu, rcu);
- struct sem_array *sma = ipc_rcu_to_struct(p);
+ struct kern_ipc_perm *p = container_of(head, struct kern_ipc_perm, rcu);
+ struct sem_array *sma = container_of(p, struct sem_array, sem_perm);
security_sem_free(sma);
- ipc_rcu_free(head);
+ kvfree(sma);
}
/*
sma->use_global_lock = USE_GLOBAL_LOCK_HYSTERESIS;
for (i = 0; i < sma->sem_nsems; i++) {
- sem = sma->sem_base + i;
+ sem = &sma->sems[i];
spin_lock(&sem->lock);
spin_unlock(&sem->lock);
}
*
* Both facts are tracked by use_global_mode.
*/
- sem = sma->sem_base + sops->sem_num;
+ sem = &sma->sems[sops->sem_num];
/*
* Initial check for use_global_lock. Just an optimization,
complexmode_tryleave(sma);
ipc_unlock_object(&sma->sem_perm);
} else {
- struct sem *sem = sma->sem_base + locknum;
+ struct sem *sem = &sma->sems[locknum];
spin_unlock(&sem->lock);
}
}
static inline void sem_lock_and_putref(struct sem_array *sma)
{
sem_lock(sma, NULL, -1);
- ipc_rcu_putref(sma, sem_rcu_free);
+ ipc_rcu_putref(&sma->sem_perm, sem_rcu_free);
}
static inline void sem_rmid(struct ipc_namespace *ns, struct sem_array *s)
ipc_rmid(&sem_ids(ns), &s->sem_perm);
}
+static struct sem_array *sem_alloc(size_t nsems)
+{
+ struct sem_array *sma;
+ size_t size;
+
+ if (nsems > (INT_MAX - sizeof(*sma)) / sizeof(sma->sems[0]))
+ return NULL;
+
+ size = sizeof(*sma) + nsems * sizeof(sma->sems[0]);
+ sma = kvmalloc(size, GFP_KERNEL);
+ if (unlikely(!sma))
+ return NULL;
+
+ memset(sma, 0, size);
+
+ return sma;
+}
+
/**
* newary - Create a new semaphore set
* @ns: namespace
*/
static int newary(struct ipc_namespace *ns, struct ipc_params *params)
{
- int id;
int retval;
struct sem_array *sma;
- int size;
key_t key = params->key;
int nsems = params->u.nsems;
int semflg = params->flg;
if (ns->used_sems + nsems > ns->sc_semmns)
return -ENOSPC;
- size = sizeof(*sma) + nsems * sizeof(struct sem);
- sma = ipc_rcu_alloc(size);
+ sma = sem_alloc(nsems);
if (!sma)
return -ENOMEM;
- memset(sma, 0, size);
-
sma->sem_perm.mode = (semflg & S_IRWXUGO);
sma->sem_perm.key = key;
sma->sem_perm.security = NULL;
retval = security_sem_alloc(sma);
if (retval) {
- ipc_rcu_putref(sma, ipc_rcu_free);
+ kvfree(sma);
return retval;
}
- sma->sem_base = (struct sem *) &sma[1];
-
for (i = 0; i < nsems; i++) {
- INIT_LIST_HEAD(&sma->sem_base[i].pending_alter);
- INIT_LIST_HEAD(&sma->sem_base[i].pending_const);
- spin_lock_init(&sma->sem_base[i].lock);
+ INIT_LIST_HEAD(&sma->sems[i].pending_alter);
+ INIT_LIST_HEAD(&sma->sems[i].pending_const);
+ spin_lock_init(&sma->sems[i].lock);
}
sma->complex_count = 0;
sma->sem_nsems = nsems;
sma->sem_ctime = get_seconds();
- id = ipc_addid(&sem_ids(ns), &sma->sem_perm, ns->sc_semmni);
- if (id < 0) {
- ipc_rcu_putref(sma, sem_rcu_free);
- return id;
+ retval = ipc_addid(&sem_ids(ns), &sma->sem_perm, ns->sc_semmni);
+ if (retval < 0) {
+ call_rcu(&sma->sem_perm.rcu, sem_rcu_free);
+ return retval;
}
ns->used_sems += nsems;
un = q->undo;
for (sop = sops; sop < sops + nsops; sop++) {
- curr = sma->sem_base + sop->sem_num;
+ curr = &sma->sems[sop->sem_num];
sem_op = sop->sem_op;
result = curr->semval;
sop--;
pid = q->pid;
while (sop >= sops) {
- sma->sem_base[sop->sem_num].sempid = pid;
+ sma->sems[sop->sem_num].sempid = pid;
sop--;
}
sop--;
while (sop >= sops) {
sem_op = sop->sem_op;
- sma->sem_base[sop->sem_num].semval -= sem_op;
+ sma->sems[sop->sem_num].semval -= sem_op;
if (sop->sem_flg & SEM_UNDO)
un->semadj[sop->sem_num] += sem_op;
sop--;
* until the operations can go through.
*/
for (sop = sops; sop < sops + nsops; sop++) {
- curr = sma->sem_base + sop->sem_num;
+ curr = &sma->sems[sop->sem_num];
sem_op = sop->sem_op;
result = curr->semval;
}
for (sop = sops; sop < sops + nsops; sop++) {
- curr = sma->sem_base + sop->sem_num;
+ curr = &sma->sems[sop->sem_num];
sem_op = sop->sem_op;
result = curr->semval;
if (semnum == -1)
pending_list = &sma->pending_const;
else
- pending_list = &sma->sem_base[semnum].pending_const;
+ pending_list = &sma->sems[semnum].pending_const;
list_for_each_entry_safe(q, tmp, pending_list, list) {
int error = perform_atomic_semop(sma, q);
for (i = 0; i < nsops; i++) {
int num = sops[i].sem_num;
- if (sma->sem_base[num].semval == 0) {
+ if (sma->sems[num].semval == 0) {
got_zero = 1;
semop_completed |= wake_const_ops(sma, num, wake_q);
}
* Assume all were changed.
*/
for (i = 0; i < sma->sem_nsems; i++) {
- if (sma->sem_base[i].semval == 0) {
+ if (sma->sems[i].semval == 0) {
got_zero = 1;
semop_completed |= wake_const_ops(sma, i, wake_q);
}
if (semnum == -1)
pending_list = &sma->pending_alter;
else
- pending_list = &sma->sem_base[semnum].pending_alter;
+ pending_list = &sma->sems[semnum].pending_alter;
again:
list_for_each_entry_safe(q, tmp, pending_list, list) {
* be in the per semaphore pending queue, and decrements
* cannot be successful if the value is already 0.
*/
- if (semnum != -1 && sma->sem_base[semnum].semval == 0)
+ if (semnum != -1 && sma->sems[semnum].semval == 0)
break;
error = perform_atomic_semop(sma, q);
static void set_semotime(struct sem_array *sma, struct sembuf *sops)
{
if (sops == NULL) {
- sma->sem_base[0].sem_otime = get_seconds();
+ sma->sems[0].sem_otime = get_seconds();
} else {
- sma->sem_base[sops[0].sem_num].sem_otime =
+ sma->sems[sops[0].sem_num].sem_otime =
get_seconds();
}
}
semcnt = 0;
/* First: check the simple operations. They are easy to evaluate */
if (count_zero)
- l = &sma->sem_base[semnum].pending_const;
+ l = &sma->sems[semnum].pending_const;
else
- l = &sma->sem_base[semnum].pending_alter;
+ l = &sma->sems[semnum].pending_alter;
list_for_each_entry(q, l, list) {
/* all task on a per-semaphore list sleep on exactly
wake_up_sem_queue_prepare(q, -EIDRM, &wake_q);
}
for (i = 0; i < sma->sem_nsems; i++) {
- struct sem *sem = sma->sem_base + i;
+ struct sem *sem = &sma->sems[i];
list_for_each_entry_safe(q, tq, &sem->pending_const, list) {
unlink_queue(sma, q);
wake_up_sem_queue_prepare(q, -EIDRM, &wake_q);
wake_up_q(&wake_q);
ns->used_sems -= sma->sem_nsems;
- ipc_rcu_putref(sma, sem_rcu_free);
+ ipc_rcu_putref(&sma->sem_perm, sem_rcu_free);
}
static unsigned long copy_semid_to_user(void __user *buf, struct semid64_ds *in, int version)
int i;
time_t res;
- res = sma->sem_base[0].sem_otime;
+ res = sma->sems[0].sem_otime;
for (i = 1; i < sma->sem_nsems; i++) {
- time_t to = sma->sem_base[i].sem_otime;
+ time_t to = sma->sems[i].sem_otime;
if (to > res)
res = to;
return -EIDRM;
}
- curr = &sma->sem_base[semnum];
+ curr = &sma->sems[semnum];
ipc_assert_locked_object(&sma->sem_perm);
list_for_each_entry(un, &sma->list_id, list_id)
goto out_unlock;
}
if (nsems > SEMMSL_FAST) {
- if (!ipc_rcu_getref(sma)) {
+ if (!ipc_rcu_getref(&sma->sem_perm)) {
err = -EIDRM;
goto out_unlock;
}
sem_unlock(sma, -1);
rcu_read_unlock();
- sem_io = ipc_alloc(sizeof(ushort)*nsems);
+ sem_io = kvmalloc_array(nsems, sizeof(ushort),
+ GFP_KERNEL);
if (sem_io == NULL) {
- ipc_rcu_putref(sma, sem_rcu_free);
+ ipc_rcu_putref(&sma->sem_perm, sem_rcu_free);
return -ENOMEM;
}
}
}
for (i = 0; i < sma->sem_nsems; i++)
- sem_io[i] = sma->sem_base[i].semval;
+ sem_io[i] = sma->sems[i].semval;
sem_unlock(sma, -1);
rcu_read_unlock();
err = 0;
int i;
struct sem_undo *un;
- if (!ipc_rcu_getref(sma)) {
+ if (!ipc_rcu_getref(&sma->sem_perm)) {
err = -EIDRM;
goto out_rcu_wakeup;
}
rcu_read_unlock();
if (nsems > SEMMSL_FAST) {
- sem_io = ipc_alloc(sizeof(ushort)*nsems);
+ sem_io = kvmalloc_array(nsems, sizeof(ushort),
+ GFP_KERNEL);
if (sem_io == NULL) {
- ipc_rcu_putref(sma, sem_rcu_free);
+ ipc_rcu_putref(&sma->sem_perm, sem_rcu_free);
return -ENOMEM;
}
}
if (copy_from_user(sem_io, p, nsems*sizeof(ushort))) {
- ipc_rcu_putref(sma, sem_rcu_free);
+ ipc_rcu_putref(&sma->sem_perm, sem_rcu_free);
err = -EFAULT;
goto out_free;
}
for (i = 0; i < nsems; i++) {
if (sem_io[i] > SEMVMX) {
- ipc_rcu_putref(sma, sem_rcu_free);
+ ipc_rcu_putref(&sma->sem_perm, sem_rcu_free);
err = -ERANGE;
goto out_free;
}
}
for (i = 0; i < nsems; i++) {
- sma->sem_base[i].semval = sem_io[i];
- sma->sem_base[i].sempid = task_tgid_vnr(current);
+ sma->sems[i].semval = sem_io[i];
+ sma->sems[i].sempid = task_tgid_vnr(current);
}
ipc_assert_locked_object(&sma->sem_perm);
err = -EIDRM;
goto out_unlock;
}
- curr = &sma->sem_base[semnum];
+ curr = &sma->sems[semnum];
switch (cmd) {
case GETVAL:
wake_up_q(&wake_q);
out_free:
if (sem_io != fast_sem_io)
- ipc_free(sem_io);
+ kvfree(sem_io);
return err;
}
}
nsems = sma->sem_nsems;
- if (!ipc_rcu_getref(sma)) {
+ if (!ipc_rcu_getref(&sma->sem_perm)) {
rcu_read_unlock();
un = ERR_PTR(-EIDRM);
goto out;
/* step 2: allocate new undo structure */
new = kzalloc(sizeof(struct sem_undo) + sizeof(short)*nsems, GFP_KERNEL);
if (!new) {
- ipc_rcu_putref(sma, sem_rcu_free);
+ ipc_rcu_putref(&sma->sem_perm, sem_rcu_free);
return ERR_PTR(-ENOMEM);
}
*/
if (nsops == 1) {
struct sem *curr;
- curr = &sma->sem_base[sops->sem_num];
+ curr = &sma->sems[sops->sem_num];
if (alter) {
if (sma->complex_count) {
/* perform adjustments registered in un */
for (i = 0; i < sma->sem_nsems; i++) {
- struct sem *semaphore = &sma->sem_base[i];
+ struct sem *semaphore = &sma->sems[i];
if (un->semadj[i]) {
semaphore->semval += un->semadj[i];
/*
static void shm_rcu_free(struct rcu_head *head)
{
- struct ipc_rcu *p = container_of(head, struct ipc_rcu, rcu);
- struct shmid_kernel *shp = ipc_rcu_to_struct(p);
-
+ struct kern_ipc_perm *ptr = container_of(head, struct kern_ipc_perm,
+ rcu);
+ struct shmid_kernel *shp = container_of(ptr, struct shmid_kernel,
+ shm_perm);
security_shm_free(shp);
- ipc_rcu_free(head);
+ kvfree(shp);
}
static inline void shm_rmid(struct ipc_namespace *ns, struct shmid_kernel *s)
user_shm_unlock(i_size_read(file_inode(shm_file)),
shp->mlock_user);
fput(shm_file);
- ipc_rcu_putref(shp, shm_rcu_free);
+ ipc_rcu_putref(&shp->shm_perm, shm_rcu_free);
}
/*
size_t numpages = (size + PAGE_SIZE - 1) >> PAGE_SHIFT;
struct file *file;
char name[13];
- int id;
vm_flags_t acctflag = 0;
if (size < SHMMIN || size > ns->shm_ctlmax)
ns->shm_tot + numpages > ns->shm_ctlall)
return -ENOSPC;
- shp = ipc_rcu_alloc(sizeof(*shp));
- if (!shp)
+ shp = kvmalloc(sizeof(*shp), GFP_KERNEL);
+ if (unlikely(!shp))
return -ENOMEM;
shp->shm_perm.key = key;
shp->shm_perm.security = NULL;
error = security_shm_alloc(shp);
if (error) {
- ipc_rcu_putref(shp, ipc_rcu_free);
+ kvfree(shp);
return error;
}
shp->shm_file = file;
shp->shm_creator = current;
- id = ipc_addid(&shm_ids(ns), &shp->shm_perm, ns->shm_ctlmni);
- if (id < 0) {
- error = id;
+ error = ipc_addid(&shm_ids(ns), &shp->shm_perm, ns->shm_ctlmni);
+ if (error < 0)
goto no_id;
- }
list_add(&shp->shm_clist, ¤t->sysvshm.shm_clist);
user_shm_unlock(size, shp->mlock_user);
fput(file);
no_file:
- ipc_rcu_putref(shp, shm_rcu_free);
+ call_rcu(&shp->shm_perm.rcu, shm_rcu_free);
return error;
}
idr_preload(GFP_KERNEL);
+ atomic_set(&new->refcount, 1);
spin_lock_init(&new->lock);
new->deleted = false;
rcu_read_lock();
ipcp->deleted = true;
}
-/**
- * ipc_alloc - allocate ipc space
- * @size: size desired
- *
- * Allocate memory from the appropriate pools and return a pointer to it.
- * NULL is returned if the allocation fails
- */
-void *ipc_alloc(int size)
-{
- return kvmalloc(size, GFP_KERNEL);
-}
-
-/**
- * ipc_free - free ipc space
- * @ptr: pointer returned by ipc_alloc
- *
- * Free a block created with ipc_alloc().
- */
-void ipc_free(void *ptr)
+int ipc_rcu_getref(struct kern_ipc_perm *ptr)
{
- kvfree(ptr);
+ return atomic_inc_not_zero(&ptr->refcount);
}
-/**
- * ipc_rcu_alloc - allocate ipc and rcu space
- * @size: size desired
- *
- * Allocate memory for the rcu header structure + the object.
- * Returns the pointer to the object or NULL upon failure.
- */
-void *ipc_rcu_alloc(int size)
+void ipc_rcu_putref(struct kern_ipc_perm *ptr,
+ void (*func)(struct rcu_head *head))
{
- /*
- * We prepend the allocation with the rcu struct
- */
- struct ipc_rcu *out = ipc_alloc(sizeof(struct ipc_rcu) + size);
- if (unlikely(!out))
- return NULL;
- atomic_set(&out->refcount, 1);
- return out + 1;
-}
-
-int ipc_rcu_getref(void *ptr)
-{
- struct ipc_rcu *p = ((struct ipc_rcu *)ptr) - 1;
-
- return atomic_inc_not_zero(&p->refcount);
-}
-
-void ipc_rcu_putref(void *ptr, void (*func)(struct rcu_head *head))
-{
- struct ipc_rcu *p = ((struct ipc_rcu *)ptr) - 1;
-
- if (!atomic_dec_and_test(&p->refcount))
+ if (!atomic_dec_and_test(&ptr->refcount))
return;
- call_rcu(&p->rcu, func);
-}
-
-void ipc_rcu_free(struct rcu_head *head)
-{
- struct ipc_rcu *p = container_of(head, struct ipc_rcu, rcu);
-
- kvfree(p);
+ call_rcu(&ptr->rcu, func);
}
/**
static inline void shm_exit_ns(struct ipc_namespace *ns) { }
#endif
-struct ipc_rcu {
- struct rcu_head rcu;
- atomic_t refcount;
-} ____cacheline_aligned_in_smp;
-
-#define ipc_rcu_to_struct(p) ((void *)(p+1))
-
/*
* Structure that holds the parameters needed by the ipc operations
* (see after)
/* must be called with ipcp locked */
int ipcperms(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp, short flg);
-/* for rare, potentially huge allocations.
- * both function can sleep
- */
-void *ipc_alloc(int size);
-void ipc_free(void *ptr);
-
/*
* For allocation that need to be freed by RCU.
* Objects are reference counted, they start with reference count 1.
* getref increases the refcount, the putref call that reduces the recount
* to 0 schedules the rcu destruction. Caller must guarantee locking.
+ *
+ * refcount is initialized by ipc_addid(), before that point call_rcu()
+ * must be used.
*/
-void *ipc_rcu_alloc(int size);
-int ipc_rcu_getref(void *ptr);
-void ipc_rcu_putref(void *ptr, void (*func)(struct rcu_head *head));
-void ipc_rcu_free(struct rcu_head *head);
+int ipc_rcu_getref(struct kern_ipc_perm *ptr);
+void ipc_rcu_putref(struct kern_ipc_perm *ptr,
+ void (*func)(struct rcu_head *head));
struct kern_ipc_perm *ipc_lock(struct ipc_ids *, int);
struct kern_ipc_perm *ipc_obtain_object_idr(struct ipc_ids *ids, int id);
obj-$(CONFIG_KGDB) += debug/
obj-$(CONFIG_DETECT_HUNG_TASK) += hung_task.o
obj-$(CONFIG_LOCKUP_DETECTOR) += watchdog.o
-obj-$(CONFIG_HARDLOCKUP_DETECTOR) += watchdog_hld.o
+obj-$(CONFIG_HARDLOCKUP_DETECTOR_PERF) += watchdog_hld.o
obj-$(CONFIG_SECCOMP) += seccomp.o
obj-$(CONFIG_RELAY) += relay.o
obj-$(CONFIG_SYSCTL) += utsname_sysctl.o
#include <asm/sections.h>
/* vmcoreinfo stuff */
-static unsigned char vmcoreinfo_data[VMCOREINFO_BYTES];
-u32 vmcoreinfo_note[VMCOREINFO_NOTE_SIZE/4];
-size_t vmcoreinfo_size;
-size_t vmcoreinfo_max_size = sizeof(vmcoreinfo_data);
+static unsigned char *vmcoreinfo_data;
+static size_t vmcoreinfo_size;
+u32 *vmcoreinfo_note;
+
+/* trusted vmcoreinfo, e.g. we can make a copy in the crash memory */
+static unsigned char *vmcoreinfo_data_safecopy;
/*
* parsing the "crashkernel" commandline
final_note(buf);
}
+void crash_update_vmcoreinfo_safecopy(void *ptr)
+{
+ if (ptr)
+ memcpy(ptr, vmcoreinfo_data, vmcoreinfo_size);
+
+ vmcoreinfo_data_safecopy = ptr;
+}
+
void crash_save_vmcoreinfo(void)
{
+ if (!vmcoreinfo_note)
+ return;
+
+ /* Use the safe copy to generate vmcoreinfo note if have */
+ if (vmcoreinfo_data_safecopy)
+ vmcoreinfo_data = vmcoreinfo_data_safecopy;
+
vmcoreinfo_append_str("CRASHTIME=%ld\n", get_seconds());
update_vmcoreinfo_note();
}
r = vscnprintf(buf, sizeof(buf), fmt, args);
va_end(args);
- r = min(r, vmcoreinfo_max_size - vmcoreinfo_size);
+ r = min(r, (size_t)VMCOREINFO_BYTES - vmcoreinfo_size);
memcpy(&vmcoreinfo_data[vmcoreinfo_size], buf, r);
phys_addr_t __weak paddr_vmcoreinfo_note(void)
{
- return __pa_symbol((unsigned long)(char *)&vmcoreinfo_note);
+ return __pa(vmcoreinfo_note);
}
static int __init crash_save_vmcoreinfo_init(void)
{
+ vmcoreinfo_data = (unsigned char *)get_zeroed_page(GFP_KERNEL);
+ if (!vmcoreinfo_data) {
+ pr_warn("Memory allocation for vmcoreinfo_data failed\n");
+ return -ENOMEM;
+ }
+
+ vmcoreinfo_note = alloc_pages_exact(VMCOREINFO_NOTE_SIZE,
+ GFP_KERNEL | __GFP_ZERO);
+ if (!vmcoreinfo_note) {
+ free_page((unsigned long)vmcoreinfo_data);
+ vmcoreinfo_data = NULL;
+ pr_warn("Memory allocation for vmcoreinfo_note failed\n");
+ return -ENOMEM;
+ }
+
VMCOREINFO_OSRELEASE(init_uts_ns.name.release);
VMCOREINFO_PAGESIZE(PAGE_SIZE);
void *stack;
int i;
- local_irq_disable();
for (i = 0; i < NR_CACHED_STACKS; i++) {
- struct vm_struct *s = this_cpu_read(cached_stacks[i]);
+ struct vm_struct *s;
+
+ s = this_cpu_xchg(cached_stacks[i], NULL);
if (!s)
continue;
- this_cpu_write(cached_stacks[i], NULL);
tsk->stack_vm_area = s;
- local_irq_enable();
return s->addr;
}
- local_irq_enable();
stack = __vmalloc_node_range(THREAD_SIZE, THREAD_SIZE,
VMALLOC_START, VMALLOC_END,
{
#ifdef CONFIG_VMAP_STACK
if (task_stack_vm_area(tsk)) {
- unsigned long flags;
int i;
- local_irq_save(flags);
for (i = 0; i < NR_CACHED_STACKS; i++) {
- if (this_cpu_read(cached_stacks[i]))
+ if (this_cpu_cmpxchg(cached_stacks[i],
+ NULL, tsk->stack_vm_area) != NULL)
continue;
- this_cpu_write(cached_stacks[i], tsk->stack_vm_area);
- local_irq_restore(flags);
return;
}
- local_irq_restore(flags);
vfree_atomic(tsk->stack);
return;
set_task_stack_end_magic(tsk);
#ifdef CONFIG_CC_STACKPROTECTOR
- tsk->stack_canary = get_random_long();
+ tsk->stack_canary = get_random_canary();
#endif
/*
kcov_task_init(tsk);
+#ifdef CONFIG_FAULT_INJECTION
+ tsk->fail_nth = 0;
+#endif
+
return tsk;
free_stack:
#include <linux/bug.h>
#include <linux/err.h>
#include <linux/kcmp.h>
+#include <linux/capability.h>
+#include <linux/list.h>
+#include <linux/eventpoll.h>
+#include <linux/file.h>
#include <asm/unistd.h>
return err;
}
+#ifdef CONFIG_EPOLL
+static int kcmp_epoll_target(struct task_struct *task1,
+ struct task_struct *task2,
+ unsigned long idx1,
+ struct kcmp_epoll_slot __user *uslot)
+{
+ struct file *filp, *filp_epoll, *filp_tgt;
+ struct kcmp_epoll_slot slot;
+ struct files_struct *files;
+
+ if (copy_from_user(&slot, uslot, sizeof(slot)))
+ return -EFAULT;
+
+ filp = get_file_raw_ptr(task1, idx1);
+ if (!filp)
+ return -EBADF;
+
+ files = get_files_struct(task2);
+ if (!files)
+ return -EBADF;
+
+ spin_lock(&files->file_lock);
+ filp_epoll = fcheck_files(files, slot.efd);
+ if (filp_epoll)
+ get_file(filp_epoll);
+ else
+ filp_tgt = ERR_PTR(-EBADF);
+ spin_unlock(&files->file_lock);
+ put_files_struct(files);
+
+ if (filp_epoll) {
+ filp_tgt = get_epoll_tfile_raw_ptr(filp_epoll, slot.tfd, slot.toff);
+ fput(filp_epoll);
+ } else
+
+ if (IS_ERR(filp_tgt))
+ return PTR_ERR(filp_tgt);
+
+ return kcmp_ptr(filp, filp_tgt, KCMP_FILE);
+}
+#else
+static int kcmp_epoll_target(struct task_struct *task1,
+ struct task_struct *task2,
+ unsigned long idx1,
+ struct kcmp_epoll_slot __user *uslot)
+{
+ return -EOPNOTSUPP;
+}
+#endif
+
SYSCALL_DEFINE5(kcmp, pid_t, pid1, pid_t, pid2, int, type,
unsigned long, idx1, unsigned long, idx2)
{
ret = -EOPNOTSUPP;
#endif
break;
+ case KCMP_EPOLL_TFD:
+ ret = kcmp_epoll_target(task1, task2, idx1, (void *)idx2);
+ break;
default:
ret = -EINVAL;
break;
if (ret)
goto out;
+ /*
+ * Some architecture(like S390) may touch the crash memory before
+ * machine_kexec_prepare(), we must copy vmcoreinfo data after it.
+ */
+ ret = kimage_crash_copy_vmcoreinfo(image);
+ if (ret)
+ goto out;
+
for (i = 0; i < nr_segments; i++) {
ret = kimage_load_segment(image, &image->segment[i]);
if (ret)
return pages;
}
+int kimage_crash_copy_vmcoreinfo(struct kimage *image)
+{
+ struct page *vmcoreinfo_page;
+ void *safecopy;
+
+ if (image->type != KEXEC_TYPE_CRASH)
+ return 0;
+
+ /*
+ * For kdump, allocate one vmcoreinfo safe copy from the
+ * crash memory. as we have arch_kexec_protect_crashkres()
+ * after kexec syscall, we naturally protect it from write
+ * (even read) access under kernel direct mapping. But on
+ * the other hand, we still need to operate it when crash
+ * happens to generate vmcoreinfo note, hereby we rely on
+ * vmap for this purpose.
+ */
+ vmcoreinfo_page = kimage_alloc_control_pages(image, 0);
+ if (!vmcoreinfo_page) {
+ pr_warn("Could not allocate vmcoreinfo buffer\n");
+ return -ENOMEM;
+ }
+ safecopy = vmap(&vmcoreinfo_page, 1, VM_MAP, PAGE_KERNEL);
+ if (!safecopy) {
+ pr_warn("Could not vmap vmcoreinfo buffer\n");
+ return -ENOMEM;
+ }
+
+ image->vmcoreinfo_data_copy = safecopy;
+ crash_update_vmcoreinfo_safecopy(safecopy);
+
+ return 0;
+}
+
static int kimage_add_entry(struct kimage *image, kimage_entry_t entry)
{
if (*image->entry != 0)
if (!image)
return;
+ if (image->vmcoreinfo_data_copy) {
+ crash_update_vmcoreinfo_safecopy(NULL);
+ vunmap(image->vmcoreinfo_data_copy);
+ }
+
kimage_free_extra_pages(image);
for_each_kimage_entry(image, ptr, entry) {
if (entry & IND_INDIRECTION) {
#include <linux/vmalloc.h>
#include "kexec_internal.h"
-/*
- * Declare these symbols weak so that if architecture provides a purgatory,
- * these will be overridden.
- */
-char __weak kexec_purgatory[0];
-size_t __weak kexec_purgatory_size = 0;
-
static int kexec_calculate_store_digests(struct kimage *image);
/* Architectures can provide this probe function */
if (ret)
goto out;
+ /*
+ * Some architecture(like S390) may touch the crash memory before
+ * machine_kexec_prepare(), we must copy vmcoreinfo data after it.
+ */
+ ret = kimage_crash_copy_vmcoreinfo(image);
+ if (ret)
+ goto out;
+
ret = kexec_calculate_store_digests(image);
if (ret)
goto out;
#ifdef CONFIG_KEXEC_FILE
#include <linux/purgatory.h>
void kimage_file_post_load_cleanup(struct kimage *image);
+extern char kexec_purgatory[];
+extern size_t kexec_purgatory_size;
#else /* CONFIG_KEXEC_FILE */
static inline void kimage_file_post_load_cleanup(struct kimage *image) { }
#endif /* CONFIG_KEXEC_FILE */
{
phys_addr_t vmcore_base = paddr_vmcoreinfo_note();
return sprintf(buf, "%pa %x\n", &vmcore_base,
- (unsigned int)sizeof(vmcoreinfo_note));
+ (unsigned int)VMCOREINFO_NOTE_SIZE);
}
KERNEL_ATTR_RO(vmcoreinfo);
#ifdef CONFIG_PROC_SYSCTL
-#define SYSCTL_WRITES_LEGACY -1
-#define SYSCTL_WRITES_WARN 0
-#define SYSCTL_WRITES_STRICT 1
+/**
+ * enum sysctl_writes_mode - supported sysctl write modes
+ *
+ * @SYSCTL_WRITES_LEGACY: each write syscall must fully contain the sysctl value
+ * to be written, and multiple writes on the same sysctl file descriptor
+ * will rewrite the sysctl value, regardless of file position. No warning
+ * is issued when the initial position is not 0.
+ * @SYSCTL_WRITES_WARN: same as above but warn when the initial file position is
+ * not 0.
+ * @SYSCTL_WRITES_STRICT: writes to numeric sysctl entries must always be at
+ * file position 0 and the value must be fully contained in the buffer
+ * sent to the write syscall. If dealing with strings respect the file
+ * position, but restrict this to the max length of the buffer, anything
+ * passed the max lenght will be ignored. Multiple writes will append
+ * to the buffer.
+ *
+ * These write modes control how current file position affects the behavior of
+ * updating sysctl values through the proc interface on each write.
+ */
+enum sysctl_writes_mode {
+ SYSCTL_WRITES_LEGACY = -1,
+ SYSCTL_WRITES_WARN = 0,
+ SYSCTL_WRITES_STRICT = 1,
+};
-static int sysctl_writes_strict = SYSCTL_WRITES_STRICT;
+static enum sysctl_writes_mode sysctl_writes_strict = SYSCTL_WRITES_STRICT;
static int proc_do_cad_pid(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos);
.extra2 = &zero,
#endif
},
+ {
+ .procname = "watchdog_cpumask",
+ .data = &watchdog_cpumask_bits,
+ .maxlen = NR_CPUS,
+ .mode = 0644,
+ .proc_handler = proc_watchdog_cpumask,
+ },
+#ifdef CONFIG_SOFTLOCKUP_DETECTOR
{
.procname = "soft_watchdog",
.data = &soft_watchdog_enabled,
.extra1 = &zero,
.extra2 = &one,
},
- {
- .procname = "watchdog_cpumask",
- .data = &watchdog_cpumask_bits,
- .maxlen = NR_CPUS,
- .mode = 0644,
- .proc_handler = proc_watchdog_cpumask,
- },
{
.procname = "softlockup_panic",
.data = &softlockup_panic,
.extra1 = &zero,
.extra2 = &one,
},
-#ifdef CONFIG_HARDLOCKUP_DETECTOR
+#ifdef CONFIG_SMP
{
- .procname = "hardlockup_panic",
- .data = &hardlockup_panic,
+ .procname = "softlockup_all_cpu_backtrace",
+ .data = &sysctl_softlockup_all_cpu_backtrace,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
.extra1 = &zero,
.extra2 = &one,
},
+#endif /* CONFIG_SMP */
#endif
-#ifdef CONFIG_SMP
+#ifdef CONFIG_HARDLOCKUP_DETECTOR
{
- .procname = "softlockup_all_cpu_backtrace",
- .data = &sysctl_softlockup_all_cpu_backtrace,
+ .procname = "hardlockup_panic",
+ .data = &hardlockup_panic,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
.extra1 = &zero,
.extra2 = &one,
},
+#ifdef CONFIG_SMP
{
.procname = "hardlockup_all_cpu_backtrace",
.data = &sysctl_hardlockup_all_cpu_backtrace,
},
#endif /* CONFIG_SMP */
#endif
+#endif
+
#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86)
{
.procname = "unknown_nmi_panic",
current->comm, table->procname);
}
+/**
+ * proc_first_pos_non_zero_ignore - check if firs position is allowed
+ * @ppos: file position
+ * @table: the sysctl table
+ *
+ * Returns true if the first position is non-zero and the sysctl_writes_strict
+ * mode indicates this is not allowed for numeric input types. String proc
+ * hadlers can ignore the return value.
+ */
+static bool proc_first_pos_non_zero_ignore(loff_t *ppos,
+ struct ctl_table *table)
+{
+ if (!*ppos)
+ return false;
+
+ switch (sysctl_writes_strict) {
+ case SYSCTL_WRITES_STRICT:
+ return true;
+ case SYSCTL_WRITES_WARN:
+ warn_sysctl_write(table);
+ return false;
+ default:
+ return false;
+ }
+}
+
/**
* proc_dostring - read a string sysctl
* @table: the sysctl table
int proc_dostring(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
- if (write && *ppos && sysctl_writes_strict == SYSCTL_WRITES_WARN)
- warn_sysctl_write(table);
+ if (write)
+ proc_first_pos_non_zero_ignore(ppos, table);
return _proc_do_string((char *)(table->data), table->maxlen, write,
(char __user *)buffer, lenp, ppos);
return 0;
}
-static int do_proc_douintvec_conv(bool *negp, unsigned long *lvalp,
- int *valp,
- int write, void *data)
+static int do_proc_douintvec_conv(unsigned long *lvalp,
+ unsigned int *valp,
+ int write, void *data)
{
if (write) {
- if (*negp)
+ if (*lvalp > UINT_MAX)
return -EINVAL;
if (*lvalp > UINT_MAX)
return -EINVAL;
*valp = *lvalp;
} else {
unsigned int val = *valp;
- *negp = false;
*lvalp = (unsigned long)val;
}
return 0;
conv = do_proc_dointvec_conv;
if (write) {
- if (*ppos) {
- switch (sysctl_writes_strict) {
- case SYSCTL_WRITES_STRICT:
- goto out;
- case SYSCTL_WRITES_WARN:
- warn_sysctl_write(table);
- break;
- default:
- break;
- }
- }
+ if (proc_first_pos_non_zero_ignore(ppos, table))
+ goto out;
if (left > PAGE_SIZE - 1)
left = PAGE_SIZE - 1;
buffer, lenp, ppos, conv, data);
}
+static int do_proc_douintvec_w(unsigned int *tbl_data,
+ struct ctl_table *table,
+ void __user *buffer,
+ size_t *lenp, loff_t *ppos,
+ int (*conv)(unsigned long *lvalp,
+ unsigned int *valp,
+ int write, void *data),
+ void *data)
+{
+ unsigned long lval;
+ int err = 0;
+ size_t left;
+ bool neg;
+ char *kbuf = NULL, *p;
+
+ left = *lenp;
+
+ if (proc_first_pos_non_zero_ignore(ppos, table))
+ goto bail_early;
+
+ if (left > PAGE_SIZE - 1)
+ left = PAGE_SIZE - 1;
+
+ p = kbuf = memdup_user_nul(buffer, left);
+ if (IS_ERR(kbuf))
+ return -EINVAL;
+
+ left -= proc_skip_spaces(&p);
+ if (!left) {
+ err = -EINVAL;
+ goto out_free;
+ }
+
+ err = proc_get_long(&p, &left, &lval, &neg,
+ proc_wspace_sep,
+ sizeof(proc_wspace_sep), NULL);
+ if (err || neg) {
+ err = -EINVAL;
+ goto out_free;
+ }
+
+ if (conv(&lval, tbl_data, 1, data)) {
+ err = -EINVAL;
+ goto out_free;
+ }
+
+ if (!err && left)
+ left -= proc_skip_spaces(&p);
+
+out_free:
+ kfree(kbuf);
+ if (err)
+ return -EINVAL;
+
+ return 0;
+
+ /* This is in keeping with old __do_proc_dointvec() */
+bail_early:
+ *ppos += *lenp;
+ return err;
+}
+
+static int do_proc_douintvec_r(unsigned int *tbl_data, void __user *buffer,
+ size_t *lenp, loff_t *ppos,
+ int (*conv)(unsigned long *lvalp,
+ unsigned int *valp,
+ int write, void *data),
+ void *data)
+{
+ unsigned long lval;
+ int err = 0;
+ size_t left;
+
+ left = *lenp;
+
+ if (conv(&lval, tbl_data, 0, data)) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ err = proc_put_long(&buffer, &left, lval, false);
+ if (err || !left)
+ goto out;
+
+ err = proc_put_char(&buffer, &left, '\n');
+
+out:
+ *lenp -= left;
+ *ppos += *lenp;
+
+ return err;
+}
+
+static int __do_proc_douintvec(void *tbl_data, struct ctl_table *table,
+ int write, void __user *buffer,
+ size_t *lenp, loff_t *ppos,
+ int (*conv)(unsigned long *lvalp,
+ unsigned int *valp,
+ int write, void *data),
+ void *data)
+{
+ unsigned int *i, vleft;
+
+ if (!tbl_data || !table->maxlen || !*lenp || (*ppos && !write)) {
+ *lenp = 0;
+ return 0;
+ }
+
+ i = (unsigned int *) tbl_data;
+ vleft = table->maxlen / sizeof(*i);
+
+ /*
+ * Arrays are not supported, keep this simple. *Do not* add
+ * support for them.
+ */
+ if (vleft != 1) {
+ *lenp = 0;
+ return -EINVAL;
+ }
+
+ if (!conv)
+ conv = do_proc_douintvec_conv;
+
+ if (write)
+ return do_proc_douintvec_w(i, table, buffer, lenp, ppos,
+ conv, data);
+ return do_proc_douintvec_r(i, buffer, lenp, ppos, conv, data);
+}
+
+static int do_proc_douintvec(struct ctl_table *table, int write,
+ void __user *buffer, size_t *lenp, loff_t *ppos,
+ int (*conv)(unsigned long *lvalp,
+ unsigned int *valp,
+ int write, void *data),
+ void *data)
+{
+ return __do_proc_douintvec(table->data, table, write,
+ buffer, lenp, ppos, conv, data);
+}
+
/**
* proc_dointvec - read a vector of integers
* @table: the sysctl table
int proc_douintvec(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
- return do_proc_dointvec(table, write, buffer, lenp, ppos,
- do_proc_douintvec_conv, NULL);
+ return do_proc_douintvec(table, write, buffer, lenp, ppos,
+ do_proc_douintvec_conv, NULL);
}
/*
do_proc_dointvec_minmax_conv, ¶m);
}
+struct do_proc_douintvec_minmax_conv_param {
+ unsigned int *min;
+ unsigned int *max;
+};
+
+static int do_proc_douintvec_minmax_conv(unsigned long *lvalp,
+ unsigned int *valp,
+ int write, void *data)
+{
+ struct do_proc_douintvec_minmax_conv_param *param = data;
+
+ if (write) {
+ unsigned int val = *lvalp;
+
+ if ((param->min && *param->min > val) ||
+ (param->max && *param->max < val))
+ return -ERANGE;
+
+ if (*lvalp > UINT_MAX)
+ return -EINVAL;
+ *valp = val;
+ } else {
+ unsigned int val = *valp;
+ *lvalp = (unsigned long) val;
+ }
+
+ return 0;
+}
+
+/**
+ * proc_douintvec_minmax - read a vector of unsigned ints with min/max values
+ * @table: the sysctl table
+ * @write: %TRUE if this is a write to the sysctl file
+ * @buffer: the user buffer
+ * @lenp: the size of the user buffer
+ * @ppos: file position
+ *
+ * Reads/writes up to table->maxlen/sizeof(unsigned int) unsigned integer
+ * values from/to the user buffer, treated as an ASCII string. Negative
+ * strings are not allowed.
+ *
+ * This routine will ensure the values are within the range specified by
+ * table->extra1 (min) and table->extra2 (max). There is a final sanity
+ * check for UINT_MAX to avoid having to support wrap around uses from
+ * userspace.
+ *
+ * Returns 0 on success.
+ */
+int proc_douintvec_minmax(struct ctl_table *table, int write,
+ void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+ struct do_proc_douintvec_minmax_conv_param param = {
+ .min = (unsigned int *) table->extra1,
+ .max = (unsigned int *) table->extra2,
+ };
+ return do_proc_douintvec(table, write, buffer, lenp, ppos,
+ do_proc_douintvec_minmax_conv, ¶m);
+}
+
static void validate_coredump_safety(void)
{
#ifdef CONFIG_COREDUMP
left = *lenp;
if (write) {
- if (*ppos) {
- switch (sysctl_writes_strict) {
- case SYSCTL_WRITES_STRICT:
- goto out;
- case SYSCTL_WRITES_WARN:
- warn_sysctl_write(table);
- break;
- default:
- break;
- }
- }
+ if (proc_first_pos_non_zero_ignore(ppos, table))
+ goto out;
if (left > PAGE_SIZE - 1)
left = PAGE_SIZE - 1;
return -ENOSYS;
}
+int proc_douintvec_minmax(struct ctl_table *table, int write,
+ void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+ return -ENOSYS;
+}
+
int proc_dointvec_jiffies(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
EXPORT_SYMBOL(proc_douintvec);
EXPORT_SYMBOL(proc_dointvec_jiffies);
EXPORT_SYMBOL(proc_dointvec_minmax);
+EXPORT_SYMBOL_GPL(proc_douintvec_minmax);
EXPORT_SYMBOL(proc_dointvec_userhz_jiffies);
EXPORT_SYMBOL(proc_dointvec_ms_jiffies);
EXPORT_SYMBOL(proc_dostring);
* CTL_KERN/KERN_VERSION is used by older glibc and cannot
* ever go away.
*/
- if (name[0] == CTL_KERN && name[1] == KERN_VERSION)
+ if (nlen >= 2 && name[0] == CTL_KERN && name[1] == KERN_VERSION)
return;
if (printk_ratelimit()) {
#include <linux/kvm_para.h>
#include <linux/kthread.h>
+/* Watchdog configuration */
static DEFINE_MUTEX(watchdog_proc_mutex);
-#if defined(CONFIG_HAVE_NMI_WATCHDOG) || defined(CONFIG_HARDLOCKUP_DETECTOR)
-unsigned long __read_mostly watchdog_enabled = SOFT_WATCHDOG_ENABLED|NMI_WATCHDOG_ENABLED;
+int __read_mostly nmi_watchdog_enabled;
+
+#if defined(CONFIG_HARDLOCKUP_DETECTOR) || defined(CONFIG_HAVE_NMI_WATCHDOG)
+unsigned long __read_mostly watchdog_enabled = SOFT_WATCHDOG_ENABLED |
+ NMI_WATCHDOG_ENABLED;
#else
unsigned long __read_mostly watchdog_enabled = SOFT_WATCHDOG_ENABLED;
#endif
-int __read_mostly nmi_watchdog_enabled;
+
+#ifdef CONFIG_HARDLOCKUP_DETECTOR
+/* boot commands */
+/*
+ * Should we panic when a soft-lockup or hard-lockup occurs:
+ */
+unsigned int __read_mostly hardlockup_panic =
+ CONFIG_BOOTPARAM_HARDLOCKUP_PANIC_VALUE;
+/*
+ * We may not want to enable hard lockup detection by default in all cases,
+ * for example when running the kernel as a guest on a hypervisor. In these
+ * cases this function can be called to disable hard lockup detection. This
+ * function should only be executed once by the boot processor before the
+ * kernel command line parameters are parsed, because otherwise it is not
+ * possible to override this in hardlockup_panic_setup().
+ */
+void hardlockup_detector_disable(void)
+{
+ watchdog_enabled &= ~NMI_WATCHDOG_ENABLED;
+}
+
+static int __init hardlockup_panic_setup(char *str)
+{
+ if (!strncmp(str, "panic", 5))
+ hardlockup_panic = 1;
+ else if (!strncmp(str, "nopanic", 7))
+ hardlockup_panic = 0;
+ else if (!strncmp(str, "0", 1))
+ watchdog_enabled &= ~NMI_WATCHDOG_ENABLED;
+ else if (!strncmp(str, "1", 1))
+ watchdog_enabled |= NMI_WATCHDOG_ENABLED;
+ return 1;
+}
+__setup("nmi_watchdog=", hardlockup_panic_setup);
+
+#endif
+
+#ifdef CONFIG_SOFTLOCKUP_DETECTOR
int __read_mostly soft_watchdog_enabled;
+#endif
+
int __read_mostly watchdog_user_enabled;
int __read_mostly watchdog_thresh = 10;
int __read_mostly sysctl_softlockup_all_cpu_backtrace;
int __read_mostly sysctl_hardlockup_all_cpu_backtrace;
#endif
-static struct cpumask watchdog_cpumask __read_mostly;
+struct cpumask watchdog_cpumask __read_mostly;
unsigned long *watchdog_cpumask_bits = cpumask_bits(&watchdog_cpumask);
-/* Helper for online, unparked cpus. */
-#define for_each_watchdog_cpu(cpu) \
- for_each_cpu_and((cpu), cpu_online_mask, &watchdog_cpumask)
-
-atomic_t watchdog_park_in_progress = ATOMIC_INIT(0);
-
/*
* The 'watchdog_running' variable is set to 1 when the watchdog threads
* are registered/started and is set to 0 when the watchdog threads are
* of 'watchdog_running' cannot change while the watchdog is deactivated
* temporarily (see related code in 'proc' handlers).
*/
-static int __read_mostly watchdog_suspended;
+int __read_mostly watchdog_suspended;
+
+/*
+ * These functions can be overridden if an architecture implements its
+ * own hardlockup detector.
+ *
+ * watchdog_nmi_enable/disable can be implemented to start and stop when
+ * softlockup watchdog threads start and stop. The arch must select the
+ * SOFTLOCKUP_DETECTOR Kconfig.
+ */
+int __weak watchdog_nmi_enable(unsigned int cpu)
+{
+ return 0;
+}
+void __weak watchdog_nmi_disable(unsigned int cpu)
+{
+}
+
+/*
+ * watchdog_nmi_reconfigure can be implemented to be notified after any
+ * watchdog configuration change. The arch hardlockup watchdog should
+ * respond to the following variables:
+ * - nmi_watchdog_enabled
+ * - watchdog_thresh
+ * - watchdog_cpumask
+ * - sysctl_hardlockup_all_cpu_backtrace
+ * - hardlockup_panic
+ * - watchdog_suspended
+ */
+void __weak watchdog_nmi_reconfigure(void)
+{
+}
+
+
+#ifdef CONFIG_SOFTLOCKUP_DETECTOR
+
+/* Helper for online, unparked cpus. */
+#define for_each_watchdog_cpu(cpu) \
+ for_each_cpu_and((cpu), cpu_online_mask, &watchdog_cpumask)
+
+atomic_t watchdog_park_in_progress = ATOMIC_INIT(0);
static u64 __read_mostly sample_period;
return 1;
}
__setup("softlockup_all_cpu_backtrace=", softlockup_all_cpu_backtrace_setup);
+#ifdef CONFIG_HARDLOCKUP_DETECTOR
static int __init hardlockup_all_cpu_backtrace_setup(char *str)
{
sysctl_hardlockup_all_cpu_backtrace =
}
__setup("hardlockup_all_cpu_backtrace=", hardlockup_all_cpu_backtrace_setup);
#endif
+#endif
/*
* Hard-lockup warnings should be triggered after just a few seconds. Soft-
__this_cpu_write(watchdog_touch_ts, 0);
}
-/* watchdog detector functions */
-bool is_hardlockup(void)
-{
- unsigned long hrint = __this_cpu_read(hrtimer_interrupts);
-
- if (__this_cpu_read(hrtimer_interrupts_saved) == hrint)
- return true;
-
- __this_cpu_write(hrtimer_interrupts_saved, hrint);
- return false;
-}
-
static int is_softlockup(unsigned long touch_ts)
{
unsigned long now = get_timestamp();
return 0;
}
-static void watchdog_interrupt_count(void)
+/* watchdog detector functions */
+bool is_hardlockup(void)
{
- __this_cpu_inc(hrtimer_interrupts);
-}
+ unsigned long hrint = __this_cpu_read(hrtimer_interrupts);
-/*
- * These two functions are mostly architecture specific
- * defining them as weak here.
- */
-int __weak watchdog_nmi_enable(unsigned int cpu)
-{
- return 0;
+ if (__this_cpu_read(hrtimer_interrupts_saved) == hrint)
+ return true;
+
+ __this_cpu_write(hrtimer_interrupts_saved, hrint);
+ return false;
}
-void __weak watchdog_nmi_disable(unsigned int cpu)
+
+static void watchdog_interrupt_count(void)
{
+ __this_cpu_inc(hrtimer_interrupts);
}
static int watchdog_enable_all_cpus(void);
kthread_unpark(per_cpu(softlockup_watchdog, cpu));
}
-/*
- * Suspend the hard and soft lockup detector by parking the watchdog threads.
- */
-int lockup_detector_suspend(void)
-{
- int ret = 0;
-
- get_online_cpus();
- mutex_lock(&watchdog_proc_mutex);
- /*
- * Multiple suspend requests can be active in parallel (counted by
- * the 'watchdog_suspended' variable). If the watchdog threads are
- * running, the first caller takes care that they will be parked.
- * The state of 'watchdog_running' cannot change while a suspend
- * request is active (see related code in 'proc' handlers).
- */
- if (watchdog_running && !watchdog_suspended)
- ret = watchdog_park_threads();
-
- if (ret == 0)
- watchdog_suspended++;
- else {
- watchdog_disable_all_cpus();
- pr_err("Failed to suspend lockup detectors, disabled\n");
- watchdog_enabled = 0;
- }
-
- mutex_unlock(&watchdog_proc_mutex);
-
- return ret;
-}
-
-/*
- * Resume the hard and soft lockup detector by unparking the watchdog threads.
- */
-void lockup_detector_resume(void)
-{
- mutex_lock(&watchdog_proc_mutex);
-
- watchdog_suspended--;
- /*
- * The watchdog threads are unparked if they were previously running
- * and if there is no more active suspend request.
- */
- if (watchdog_running && !watchdog_suspended)
- watchdog_unpark_threads();
-
- mutex_unlock(&watchdog_proc_mutex);
- put_online_cpus();
-}
-
static int update_watchdog_all_cpus(void)
{
int ret;
}
}
+#ifdef CONFIG_SYSCTL
+static int watchdog_update_cpus(void)
+{
+ return smpboot_update_cpumask_percpu_thread(
+ &watchdog_threads, &watchdog_cpumask);
+}
+#endif
+
+#else /* SOFTLOCKUP */
+static int watchdog_park_threads(void)
+{
+ return 0;
+}
+
+static void watchdog_unpark_threads(void)
+{
+}
+
+static int watchdog_enable_all_cpus(void)
+{
+ return 0;
+}
+
+static void watchdog_disable_all_cpus(void)
+{
+}
+
+#ifdef CONFIG_SYSCTL
+static int watchdog_update_cpus(void)
+{
+ return 0;
+}
+#endif
+
+static void set_sample_period(void)
+{
+}
+#endif /* SOFTLOCKUP */
+
+/*
+ * Suspend the hard and soft lockup detector by parking the watchdog threads.
+ */
+int lockup_detector_suspend(void)
+{
+ int ret = 0;
+
+ get_online_cpus();
+ mutex_lock(&watchdog_proc_mutex);
+ /*
+ * Multiple suspend requests can be active in parallel (counted by
+ * the 'watchdog_suspended' variable). If the watchdog threads are
+ * running, the first caller takes care that they will be parked.
+ * The state of 'watchdog_running' cannot change while a suspend
+ * request is active (see related code in 'proc' handlers).
+ */
+ if (watchdog_running && !watchdog_suspended)
+ ret = watchdog_park_threads();
+
+ if (ret == 0)
+ watchdog_suspended++;
+ else {
+ watchdog_disable_all_cpus();
+ pr_err("Failed to suspend lockup detectors, disabled\n");
+ watchdog_enabled = 0;
+ }
+
+ watchdog_nmi_reconfigure();
+
+ mutex_unlock(&watchdog_proc_mutex);
+
+ return ret;
+}
+
+/*
+ * Resume the hard and soft lockup detector by unparking the watchdog threads.
+ */
+void lockup_detector_resume(void)
+{
+ mutex_lock(&watchdog_proc_mutex);
+
+ watchdog_suspended--;
+ /*
+ * The watchdog threads are unparked if they were previously running
+ * and if there is no more active suspend request.
+ */
+ if (watchdog_running && !watchdog_suspended)
+ watchdog_unpark_threads();
+
+ watchdog_nmi_reconfigure();
+
+ mutex_unlock(&watchdog_proc_mutex);
+ put_online_cpus();
+}
+
#ifdef CONFIG_SYSCTL
/*
else
watchdog_disable_all_cpus();
+ watchdog_nmi_reconfigure();
+
return err;
}
* a temporary cpumask, so we are likely not in a
* position to do much else to make things better.
*/
- if (smpboot_update_cpumask_percpu_thread(
- &watchdog_threads, &watchdog_cpumask) != 0)
+ if (watchdog_update_cpus() != 0)
pr_err("cpumask update failed\n");
}
+
+ watchdog_nmi_reconfigure();
}
out:
mutex_unlock(&watchdog_proc_mutex);
static DEFINE_PER_CPU(bool, watchdog_nmi_touch);
static DEFINE_PER_CPU(struct perf_event *, watchdog_ev);
-/* boot commands */
-/*
- * Should we panic when a soft-lockup or hard-lockup occurs:
- */
-unsigned int __read_mostly hardlockup_panic =
- CONFIG_BOOTPARAM_HARDLOCKUP_PANIC_VALUE;
static unsigned long hardlockup_allcpu_dumped;
-/*
- * We may not want to enable hard lockup detection by default in all cases,
- * for example when running the kernel as a guest on a hypervisor. In these
- * cases this function can be called to disable hard lockup detection. This
- * function should only be executed once by the boot processor before the
- * kernel command line parameters are parsed, because otherwise it is not
- * possible to override this in hardlockup_panic_setup().
- */
-void hardlockup_detector_disable(void)
-{
- watchdog_enabled &= ~NMI_WATCHDOG_ENABLED;
-}
-
-static int __init hardlockup_panic_setup(char *str)
-{
- if (!strncmp(str, "panic", 5))
- hardlockup_panic = 1;
- else if (!strncmp(str, "nopanic", 7))
- hardlockup_panic = 0;
- else if (!strncmp(str, "0", 1))
- watchdog_enabled &= ~NMI_WATCHDOG_ENABLED;
- else if (!strncmp(str, "1", 1))
- watchdog_enabled |= NMI_WATCHDOG_ENABLED;
- return 1;
-}
-__setup("nmi_watchdog=", hardlockup_panic_setup);
-void touch_nmi_watchdog(void)
+void arch_touch_nmi_watchdog(void)
{
/*
* Using __raw here because some code paths have
* going off.
*/
raw_cpu_write(watchdog_nmi_touch, true);
- touch_softlockup_watchdog();
}
-EXPORT_SYMBOL(touch_nmi_watchdog);
+EXPORT_SYMBOL(arch_touch_nmi_watchdog);
static struct perf_event_attr wd_hw_attr = {
.type = PERF_TYPE_HARDWARE,
menu "Debug Lockups and Hangs"
config LOCKUP_DETECTOR
- bool "Detect Hard and Soft Lockups"
+ bool
+
+config SOFTLOCKUP_DETECTOR
+ bool "Detect Soft Lockups"
depends on DEBUG_KERNEL && !S390
+ select LOCKUP_DETECTOR
help
Say Y here to enable the kernel to act as a watchdog to detect
- hard and soft lockups.
+ soft lockups.
Softlockups are bugs that cause the kernel to loop in kernel
mode for more than 20 seconds, without giving other tasks a
chance to run. The current stack trace is displayed upon
detection and the system will stay locked up.
+config HARDLOCKUP_DETECTOR_PERF
+ bool
+ select SOFTLOCKUP_DETECTOR
+
+#
+# arch/ can define HAVE_HARDLOCKUP_DETECTOR_ARCH to provide their own hard
+# lockup detector rather than the perf based detector.
+#
+config HARDLOCKUP_DETECTOR
+ bool "Detect Hard Lockups"
+ depends on DEBUG_KERNEL && !S390
+ depends on HAVE_HARDLOCKUP_DETECTOR_PERF || HAVE_HARDLOCKUP_DETECTOR_ARCH
+ select LOCKUP_DETECTOR
+ select HARDLOCKUP_DETECTOR_PERF if HAVE_HARDLOCKUP_DETECTOR_PERF
+ select HARDLOCKUP_DETECTOR_ARCH if HAVE_HARDLOCKUP_DETECTOR_ARCH
+ help
+ Say Y here to enable the kernel to act as a watchdog to detect
+ hard lockups.
+
Hardlockups are bugs that cause the CPU to loop in kernel mode
for more than 10 seconds, without letting other interrupts have a
chance to run. The current stack trace is displayed upon detection
and the system will stay locked up.
- The overhead should be minimal. A periodic hrtimer runs to
- generate interrupts and kick the watchdog task every 4 seconds.
- An NMI is generated every 10 seconds or so to check for hardlockups.
-
- The frequency of hrtimer and NMI events and the soft and hard lockup
- thresholds can be controlled through the sysctl watchdog_thresh.
-
-config HARDLOCKUP_DETECTOR
- def_bool y
- depends on LOCKUP_DETECTOR && !HAVE_NMI_WATCHDOG
- depends on PERF_EVENTS && HAVE_PERF_EVENTS_NMI
-
config BOOTPARAM_HARDLOCKUP_PANIC
bool "Panic (Reboot) On Hard Lockups"
depends on HARDLOCKUP_DETECTOR
config BOOTPARAM_SOFTLOCKUP_PANIC
bool "Panic (Reboot) On Soft Lockups"
- depends on LOCKUP_DETECTOR
+ depends on SOFTLOCKUP_DETECTOR
help
Say Y here to enable the kernel to panic on "soft lockups",
which are bugs that cause the kernel to loop in kernel
config BOOTPARAM_SOFTLOCKUP_PANIC_VALUE
int
- depends on LOCKUP_DETECTOR
+ depends on SOFTLOCKUP_DETECTOR
range 0 1
default 0 if !BOOTPARAM_SOFTLOCKUP_PANIC
default 1 if BOOTPARAM_SOFTLOCKUP_PANIC
config DETECT_HUNG_TASK
bool "Detect Hung Tasks"
depends on DEBUG_KERNEL
- default LOCKUP_DETECTOR
+ default SOFTLOCKUP_DETECTOR
help
Say Y here to enable the kernel to detect "hung tasks",
which are bugs that cause the task to be stuck in
If unsure, say N.
+config TEST_SYSCTL
+ tristate "sysctl test driver"
+ default n
+ depends on PROC_SYSCTL
+ help
+ This builds the "test_sysctl" module. This driver enables to test the
+ proc sysctl interfaces available to drivers safely without affecting
+ production knobs which might alter system functionality.
+
+ If unsure, say N.
+
config TEST_UDELAY
tristate "udelay test driver"
default n
obj-y += kstrtox.o
obj-$(CONFIG_TEST_BPF) += test_bpf.o
obj-$(CONFIG_TEST_FIRMWARE) += test_firmware.o
+obj-$(CONFIG_TEST_SYSCTL) += test_sysctl.o
obj-$(CONFIG_TEST_HASH) += test_hash.o test_siphash.o
obj-$(CONFIG_TEST_KASAN) += test_kasan.o
obj-$(CONFIG_TEST_KSTRTOX) += test-kstrtox.o
bool should_fail(struct fault_attr *attr, ssize_t size)
{
+ if (in_task() && current->fail_nth) {
+ if (--current->fail_nth == 0)
+ goto fail;
+ return false;
+ }
+
/* No need to check any other properties if the probability is 0 */
if (attr->probability == 0)
return false;
if (!fail_stacktrace(attr))
return false;
+fail:
fail_dump(attr);
if (atomic_read(&attr->times) != -1)
}
EXPORT_SYMBOL(percpu_counter_set);
+/**
+ * This function is both preempt and irq safe. The former is due to explicit
+ * preemption disable. The latter is guaranteed by the fact that the slow path
+ * is explicitly protected by an irq-safe spinlock whereas the fast patch uses
+ * this_cpu_add which is irq-safe by definition. Hence there is no need muck
+ * with irq state before calling this one
+ */
void percpu_counter_add_batch(struct percpu_counter *fbc, s64 amount, s32 batch)
{
s64 count;
return s;
}
EXPORT_SYMBOL(strreplace);
+
+void fortify_panic(const char *name)
+{
+ pr_emerg("detected buffer overflow in %s\n", name);
+ BUG();
+}
+EXPORT_SYMBOL(fortify_panic);
--- /dev/null
+/*
+ * proc sysctl test driver
+ *
+ * Copyright (C) 2017 Luis R. Rodriguez <mcgrof@kernel.org>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or at your option any
+ * later version; or, when distributed separately from the Linux kernel or
+ * when incorporated into other software packages, subject to the following
+ * license:
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of copyleft-next (version 0.3.1 or later) as published
+ * at http://copyleft-next.org/.
+ */
+
+/*
+ * This module provides an interface to the the proc sysctl interfaces. This
+ * driver requires CONFIG_PROC_SYSCTL. It will not normally be loaded by the
+ * system unless explicitly requested by name. You can also build this driver
+ * into your kernel.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/init.h>
+#include <linux/list.h>
+#include <linux/module.h>
+#include <linux/printk.h>
+#include <linux/fs.h>
+#include <linux/miscdevice.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+#include <linux/async.h>
+#include <linux/delay.h>
+#include <linux/vmalloc.h>
+
+static int i_zero;
+static int i_one_hundred = 100;
+
+struct test_sysctl_data {
+ int int_0001;
+ int int_0002;
+ int int_0003[4];
+
+ unsigned int uint_0001;
+
+ char string_0001[65];
+};
+
+static struct test_sysctl_data test_data = {
+ .int_0001 = 60,
+ .int_0002 = 1,
+
+ .int_0003[0] = 0,
+ .int_0003[1] = 1,
+ .int_0003[2] = 2,
+ .int_0003[3] = 3,
+
+ .uint_0001 = 314,
+
+ .string_0001 = "(none)",
+};
+
+/* These are all under /proc/sys/debug/test_sysctl/ */
+static struct ctl_table test_table[] = {
+ {
+ .procname = "int_0001",
+ .data = &test_data.int_0001,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &i_zero,
+ .extra2 = &i_one_hundred,
+ },
+ {
+ .procname = "int_0002",
+ .data = &test_data.int_0002,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
+ .procname = "int_0003",
+ .data = &test_data.int_0003,
+ .maxlen = sizeof(test_data.int_0003),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
+ .procname = "uint_0001",
+ .data = &test_data.uint_0001,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = proc_douintvec,
+ },
+ {
+ .procname = "string_0001",
+ .data = &test_data.string_0001,
+ .maxlen = sizeof(test_data.string_0001),
+ .mode = 0644,
+ .proc_handler = proc_dostring,
+ },
+ { }
+};
+
+static struct ctl_table test_sysctl_table[] = {
+ {
+ .procname = "test_sysctl",
+ .maxlen = 0,
+ .mode = 0555,
+ .child = test_table,
+ },
+ { }
+};
+
+static struct ctl_table test_sysctl_root_table[] = {
+ {
+ .procname = "debug",
+ .maxlen = 0,
+ .mode = 0555,
+ .child = test_sysctl_table,
+ },
+ { }
+};
+
+static struct ctl_table_header *test_sysctl_header;
+
+static int __init test_sysctl_init(void)
+{
+ test_sysctl_header = register_sysctl_table(test_sysctl_root_table);
+ if (!test_sysctl_header)
+ return -ENOMEM;
+ return 0;
+}
+late_initcall(test_sysctl_init);
+
+static void __exit test_sysctl_exit(void)
+{
+ if (test_sysctl_header)
+ unregister_sysctl_table(test_sysctl_header);
+}
+
+module_exit(test_sysctl_exit);
+
+MODULE_AUTHOR("Luis R. Rodriguez <mcgrof@kernel.org>");
+MODULE_LICENSE("GPL");
page = __alloc_pages_node(nid,
htlb_alloc_mask(h)|__GFP_COMP|__GFP_THISNODE|
- __GFP_REPEAT|__GFP_NOWARN,
+ __GFP_RETRY_MAYFAIL|__GFP_NOWARN,
huge_page_order(h));
if (page) {
prep_new_huge_page(h, page, nid);
{
int order = huge_page_order(h);
- gfp_mask |= __GFP_COMP|__GFP_REPEAT|__GFP_NOWARN;
+ gfp_mask |= __GFP_COMP|__GFP_RETRY_MAYFAIL|__GFP_NOWARN;
if (nid == NUMA_NO_NODE)
nid = numa_mem_id();
return __alloc_pages_nodemask(gfp_mask, order, nid, nmask);
* hints such as HIGHMEM usage.
*/
#define GFP_RECLAIM_MASK (__GFP_RECLAIM|__GFP_HIGH|__GFP_IO|__GFP_FS|\
- __GFP_NOWARN|__GFP_REPEAT|__GFP_NOFAIL|\
+ __GFP_NOWARN|__GFP_RETRY_MAYFAIL|__GFP_NOFAIL|\
__GFP_NORETRY|__GFP_MEMALLOC|__GFP_NOMEMALLOC|\
__GFP_ATOMIC)
return 0;
}
-static int create_huge_pmd(struct vm_fault *vmf)
+static inline int create_huge_pmd(struct vm_fault *vmf)
{
if (vma_is_anonymous(vmf->vma))
return do_huge_pmd_anonymous_page(vmf);
/*
* if !vma, alloc_page_vma() will use task or system default policy
*/
- return alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, address);
+ return alloc_page_vma(GFP_HIGHUSER_MOVABLE | __GFP_RETRY_MAYFAIL,
+ vma, address);
}
#else
{
struct wb_domain *cgdom;
- __inc_wb_stat(wb, WB_WRITTEN);
+ inc_wb_stat(wb, WB_WRITTEN);
wb_domain_writeout_inc(&global_wb_domain, &wb->completions,
wb->bdi->max_prop_frac);
__inc_lruvec_page_state(page, NR_FILE_DIRTY);
__inc_zone_page_state(page, NR_ZONE_WRITE_PENDING);
__inc_node_page_state(page, NR_DIRTIED);
- __inc_wb_stat(wb, WB_RECLAIMABLE);
- __inc_wb_stat(wb, WB_DIRTIED);
+ inc_wb_stat(wb, WB_RECLAIMABLE);
+ inc_wb_stat(wb, WB_DIRTIED);
task_io_account_write(PAGE_SIZE);
current->nr_dirtied++;
this_cpu_inc(bdp_ratelimits);
if (bdi_cap_account_writeback(bdi)) {
struct bdi_writeback *wb = inode_to_wb(inode);
- __dec_wb_stat(wb, WB_WRITEBACK);
+ dec_wb_stat(wb, WB_WRITEBACK);
__wb_writeout_inc(wb);
}
}
page_index(page),
PAGECACHE_TAG_WRITEBACK);
if (bdi_cap_account_writeback(bdi))
- __inc_wb_stat(inode_to_wb(inode), WB_WRITEBACK);
+ inc_wb_stat(inode_to_wb(inode), WB_WRITEBACK);
/*
* We can come through here when swapping anonymous
/* The OOM killer will not help higher order allocs */
if (order > PAGE_ALLOC_COSTLY_ORDER)
goto out;
+ /*
+ * We have already exhausted all our reclaim opportunities without any
+ * success so it is time to admit defeat. We will skip the OOM killer
+ * because it is very likely that the caller has a more reasonable
+ * fallback than shooting a random task.
+ */
+ if (gfp_mask & __GFP_RETRY_MAYFAIL)
+ goto out;
/* The OOM killer does not needlessly kill tasks for lowmem */
if (ac->high_zoneidx < ZONE_NORMAL)
goto out;
}
/*
- * !costly requests are much more important than __GFP_REPEAT
+ * !costly requests are much more important than __GFP_RETRY_MAYFAIL
* costly ones because they are de facto nofail and invoke OOM
* killer to move on while costly can fail and users are ready
* to cope with that. 1/4 retries is rather arbitrary but we
/*
* Do not retry costly high order allocations unless they are
- * __GFP_REPEAT
+ * __GFP_RETRY_MAYFAIL
*/
- if (costly_order && !(gfp_mask & __GFP_REPEAT))
+ if (costly_order && !(gfp_mask & __GFP_RETRY_MAYFAIL))
goto nopage;
if (should_reclaim_retry(gfp_mask, order, ac, alloc_flags,
if (node_state(node, N_HIGH_MEMORY))
page = alloc_pages_node(
- node, GFP_KERNEL | __GFP_ZERO | __GFP_REPEAT,
+ node, GFP_KERNEL | __GFP_ZERO | __GFP_RETRY_MAYFAIL,
get_order(size));
else
page = alloc_pages(
- GFP_KERNEL | __GFP_ZERO | __GFP_REPEAT,
+ GFP_KERNEL | __GFP_ZERO | __GFP_RETRY_MAYFAIL,
get_order(size));
if (page)
return page_address(page);
* Uses kmalloc to get the memory but if the allocation fails then falls back
* to the vmalloc allocator. Use kvfree for freeing the memory.
*
- * Reclaim modifiers - __GFP_NORETRY and __GFP_NOFAIL are not supported. __GFP_REPEAT
- * is supported only for large (>32kB) allocations, and it should be used only if
- * kmalloc is preferable to the vmalloc fallback, due to visible performance drawbacks.
+ * Reclaim modifiers - __GFP_NORETRY and __GFP_NOFAIL are not supported.
+ * __GFP_RETRY_MAYFAIL is supported, and it should be used only if kmalloc is
+ * preferable to the vmalloc fallback, due to visible performance drawbacks.
*
* Any use of gfp flags outside of GFP_KERNEL should be consulted with mm people.
*/
if (size > PAGE_SIZE) {
kmalloc_flags |= __GFP_NOWARN;
- /*
- * We have to override __GFP_REPEAT by __GFP_NORETRY for !costly
- * requests because there is no other way to tell the allocator
- * that we want to fail rather than retry endlessly.
- */
- if (!(kmalloc_flags & __GFP_REPEAT) ||
- (size <= PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER))
+ if (!(kmalloc_flags & __GFP_RETRY_MAYFAIL))
kmalloc_flags |= __GFP_NORETRY;
}
* allocator with @gfp_mask flags. Map them into contiguous
* kernel virtual space, using a pagetable protection of @prot.
*
- * Reclaim modifiers in @gfp_mask - __GFP_NORETRY, __GFP_REPEAT
+ * Reclaim modifiers in @gfp_mask - __GFP_NORETRY, __GFP_RETRY_MAYFAIL
* and __GFP_NOFAIL are not supported
*
* Any use of gfp flags outside of GFP_KERNEL should be consulted
return false;
/* Consider stopping depending on scan and reclaim activity */
- if (sc->gfp_mask & __GFP_REPEAT) {
+ if (sc->gfp_mask & __GFP_RETRY_MAYFAIL) {
/*
- * For __GFP_REPEAT allocations, stop reclaiming if the
+ * For __GFP_RETRY_MAYFAIL allocations, stop reclaiming if the
* full LRU list has been scanned and we are still failing
* to reclaim pages. This full LRU scan is potentially
- * expensive but a __GFP_REPEAT caller really wants to succeed
+ * expensive but a __GFP_RETRY_MAYFAIL caller really wants to succeed
*/
if (!nr_reclaimed && !nr_scanned)
return false;
} else {
/*
- * For non-__GFP_REPEAT allocations which can presumably
+ * For non-__GFP_RETRY_MAYFAIL allocations which can presumably
* fail without consequence, stop if we failed to reclaim
* any pages from the last SWAP_CLUSTER_MAX number of
* pages that were scanned. This will return to the
BUG_ON(count < 1);
- rx = kvzalloc(sz, GFP_KERNEL | __GFP_REPEAT);
+ rx = kvzalloc(sz, GFP_KERNEL | __GFP_RETRY_MAYFAIL);
if (!rx)
return -ENOMEM;
if (count < 1 || count > 0xffff)
return -EINVAL;
- tx = kvzalloc(sz, GFP_KERNEL | __GFP_REPEAT);
+ tx = kvzalloc(sz, GFP_KERNEL | __GFP_RETRY_MAYFAIL);
if (!tx)
return -ENOMEM;
/* ensure 32-byte alignment of whole construct */
alloc_size += NETDEV_ALIGN - 1;
- p = kvzalloc(alloc_size, GFP_KERNEL | __GFP_REPEAT);
+ p = kvzalloc(alloc_size, GFP_KERNEL | __GFP_RETRY_MAYFAIL);
if (!p)
return NULL;
gfp_head = gfp_mask;
if (gfp_head & __GFP_DIRECT_RECLAIM)
- gfp_head |= __GFP_REPEAT;
+ gfp_head |= __GFP_RETRY_MAYFAIL;
*errcode = -ENOBUFS;
skb = alloc_skb(header_len, gfp_head);
if ((SMP_ALIGN(size) >> PAGE_SHIFT) + 2 > totalram_pages)
return NULL;
- if (sz <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER))
- info = kmalloc(sz, GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY);
- if (!info) {
- info = __vmalloc(sz, GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY,
- PAGE_KERNEL);
- if (!info)
- return NULL;
- }
+ info = kvmalloc(sz, GFP_KERNEL);
+ if (!info)
+ return NULL;
+
memset(info, 0, sizeof(*info));
info->size = size;
return info;
return 0;
/* If XPS was setup, we can allocate memory on right NUMA node */
- array = kvmalloc_node(sizeof(struct rb_root) << log, GFP_KERNEL | __GFP_REPEAT,
+ array = kvmalloc_node(sizeof(struct rb_root) << log, GFP_KERNEL | __GFP_RETRY_MAYFAIL,
netdev_queue_numa_node_read(sch->dev_queue));
if (!array)
return -ENOMEM;
for (i = 0; i < nents; i++) {
printk(KERN_INFO
"sg[%d] -> "
- "page_link 0x%.8lx offset 0x%.8x length 0x%.8x\n",
- i, sg[i].page_link, sg[i].offset, sg[i].length);
+ "page %p offset 0x%.8x length 0x%.8x\n",
+ i, sg_page(&sg[i]), sg[i].offset, sg[i].length);
if (sg_is_last(&sg[i]))
break;
for (i = 0; i < nents; i++) {
printk(KERN_INFO
"sg[%d] -> "
- "page_link 0x%.8lx offset 0x%.8x length 0x%.8x\n",
- i, sg[i].page_link, sg[i].offset, sg[i].length);
+ "page %p offset 0x%.8x length 0x%.8x\n",
+ i, sg_page(&sg[i]), sg[i].offset, sg[i].length);
if (sg_is_last(&sg[i]))
break;
"architecture specific defines should be avoided\n" . $herecurr);
}
+# check that the storage class is not after a type
+ if ($line =~ /\b($Type)\s+($Storage)\b/) {
+ WARN("STORAGE_CLASS",
+ "storage class '$2' should be located before type '$1'\n" . $herecurr);
+ }
# Check that the storage class is at the beginning of a declaration
- if ($line =~ /\b$Storage\b/ && $line !~ /^.\s*$Storage\b/) {
+ if ($line =~ /\b$Storage\b/ &&
+ $line !~ /^.\s*$Storage/ &&
+ $line =~ /^.\s*(.+?)\$Storage\s/ &&
+ $1 !~ /[\,\)]\s*$/) {
WARN("STORAGE_CLASS",
- "storage class should be at the beginning of the declaration\n" . $herecurr)
+ "storage class should be at the beginning of the declaration\n" . $herecurr);
}
# check the location of the inline attribute, that it is between
#include <linux/fs.h>
#include <linux/mount.h>
+#include <linux/of_fdt.h>
/* We need to stringify expanded macros so that they can be parsed */
LX_VALUE(MNT_NOATIME)
LX_VALUE(MNT_NODIRATIME)
LX_VALUE(MNT_RELATIME)
+
+/* linux/of_fdt.h> */
+LX_VALUE(OF_DT_HEADER)
+
+/* Kernel Configs */
+LX_CONFIG(CONFIG_OF)
#
import gdb
+import sys
from linux import utils
def invoke(self, arg, from_tty):
log_buf_addr = int(str(gdb.parse_and_eval(
- "'printk.c'::log_buf")).split()[0], 16)
+ "(void *)'printk.c'::log_buf")).split()[0], 16)
log_first_idx = int(gdb.parse_and_eval("'printk.c'::log_first_idx"))
log_next_idx = int(gdb.parse_and_eval("'printk.c'::log_next_idx"))
log_buf_len = int(gdb.parse_and_eval("'printk.c'::log_buf_len"))
continue
text_len = utils.read_u16(log_buf[pos + 10:pos + 12])
- text = log_buf[pos + 16:pos + 16 + text_len].decode()
+ text = log_buf[pos + 16:pos + 16 + text_len].decode(
+ encoding='utf8', errors='replace')
time_stamp = utils.read_u64(log_buf[pos:pos + 8])
for line in text.splitlines():
- gdb.write("[{time:12.6f}] {line}\n".format(
+ msg = u"[{time:12.6f}] {line}\n".format(
time=time_stamp / 1000000000.0,
- line=line))
+ line=line)
+ # With python2 gdb.write will attempt to convert unicode to
+ # ascii and might fail so pass an utf8-encoded str instead.
+ if sys.hexversion < 0x03000000:
+ msg = msg.encode(encoding='utf8', errors='replace')
+ gdb.write(msg)
pos += length
from linux import utils
from linux import tasks
from linux import lists
+from struct import *
class LxCmdLine(gdb.Command):
info_opts(MNT_INFO, m_flags)))
LxMounts()
+
+
+class LxFdtDump(gdb.Command):
+ """Output Flattened Device Tree header and dump FDT blob to the filename
+ specified as the command argument. Equivalent to
+ 'cat /proc/fdt > fdtdump.dtb' on a running target"""
+
+ def __init__(self):
+ super(LxFdtDump, self).__init__("lx-fdtdump", gdb.COMMAND_DATA,
+ gdb.COMPLETE_FILENAME)
+
+ def fdthdr_to_cpu(self, fdt_header):
+
+ fdt_header_be = ">IIIIIII"
+ fdt_header_le = "<IIIIIII"
+
+ if utils.get_target_endianness() == 1:
+ output_fmt = fdt_header_le
+ else:
+ output_fmt = fdt_header_be
+
+ return unpack(output_fmt, pack(fdt_header_be,
+ fdt_header['magic'],
+ fdt_header['totalsize'],
+ fdt_header['off_dt_struct'],
+ fdt_header['off_dt_strings'],
+ fdt_header['off_mem_rsvmap'],
+ fdt_header['version'],
+ fdt_header['last_comp_version']))
+
+ def invoke(self, arg, from_tty):
+
+ if not constants.LX_CONFIG_OF:
+ raise gdb.GdbError("Kernel not compiled with CONFIG_OF\n")
+
+ if len(arg) == 0:
+ filename = "fdtdump.dtb"
+ else:
+ filename = arg
+
+ py_fdt_header_ptr = gdb.parse_and_eval(
+ "(const struct fdt_header *) initial_boot_params")
+ py_fdt_header = py_fdt_header_ptr.dereference()
+
+ fdt_header = self.fdthdr_to_cpu(py_fdt_header)
+
+ if fdt_header[0] != constants.LX_OF_DT_HEADER:
+ raise gdb.GdbError("No flattened device tree magic found\n")
+
+ gdb.write("fdt_magic: 0x{:02X}\n".format(fdt_header[0]))
+ gdb.write("fdt_totalsize: 0x{:02X}\n".format(fdt_header[1]))
+ gdb.write("off_dt_struct: 0x{:02X}\n".format(fdt_header[2]))
+ gdb.write("off_dt_strings: 0x{:02X}\n".format(fdt_header[3]))
+ gdb.write("off_mem_rsvmap: 0x{:02X}\n".format(fdt_header[4]))
+ gdb.write("version: {}\n".format(fdt_header[5]))
+ gdb.write("last_comp_version: {}\n".format(fdt_header[6]))
+
+ inf = gdb.inferiors()[0]
+ fdt_buf = utils.read_memoryview(inf, py_fdt_header_ptr,
+ fdt_header[1]).tobytes()
+
+ try:
+ f = open(filename, 'wb')
+ except:
+ raise gdb.GdbError("Could not open file to dump fdt")
+
+ f.write(fdt_buf)
+ f.close()
+
+ gdb.write("Dumped fdt blob to " + filename + "\n")
+
+LxFdtDump()
been removed. This config is intended to be used only while
trying to find such users.
+config FORTIFY_SOURCE
+ bool "Harden common str/mem functions against buffer overflows"
+ depends on ARCH_HAS_FORTIFY_SOURCE
+ help
+ Detect overflows of buffers in common string and memory functions
+ where the compiler can determine and validate the buffer sizes.
+
config STATIC_USERMODEHELPER
bool "Force all usermode helper calls through a single binary"
help
{ "__GFP_FS", "F" },
{ "__GFP_COLD", "CO" },
{ "__GFP_NOWARN", "NWR" },
- { "__GFP_REPEAT", "R" },
+ { "__GFP_RETRY_MAYFAIL", "R" },
{ "__GFP_NOFAIL", "NF" },
{ "__GFP_NORETRY", "NR" },
{ "__GFP_COMP", "C" },
# No binaries, but make sure arg-less "make" doesn't trigger "run_tests".