9c78e06f7ba4a0c451879d64404337075afa8207
[muen/linux.git] / kernel / sysctl.c
1 /*
2  * sysctl.c: General linux system control interface
3  *
4  * Begun 24 March 1995, Stephen Tweedie
5  * Added /proc support, Dec 1995
6  * Added bdflush entry and intvec min/max checking, 2/23/96, Tom Dyas.
7  * Added hooks for /proc/sys/net (minor, minor patch), 96/4/1, Mike Shaver.
8  * Added kernel/java-{interpreter,appletviewer}, 96/5/10, Mike Shaver.
9  * Dynamic registration fixes, Stephen Tweedie.
10  * Added kswapd-interval, ctrl-alt-del, printk stuff, 1/8/97, Chris Horn.
11  * Made sysctl support optional via CONFIG_SYSCTL, 1/10/97, Chris
12  *  Horn.
13  * Added proc_doulongvec_ms_jiffies_minmax, 09/08/99, Carlos H. Bauer.
14  * Added proc_doulongvec_minmax, 09/08/99, Carlos H. Bauer.
15  * Changed linked lists to use list.h instead of lists.h, 02/24/00, Bill
16  *  Wendling.
17  * The list_for_each() macro wasn't appropriate for the sysctl loop.
18  *  Removed it and replaced it with older style, 03/23/00, Bill Wendling
19  */
20
21 #include <linux/module.h>
22 #include <linux/aio.h>
23 #include <linux/mm.h>
24 #include <linux/swap.h>
25 #include <linux/slab.h>
26 #include <linux/sysctl.h>
27 #include <linux/bitmap.h>
28 #include <linux/signal.h>
29 #include <linux/printk.h>
30 #include <linux/proc_fs.h>
31 #include <linux/security.h>
32 #include <linux/ctype.h>
33 #include <linux/kmemleak.h>
34 #include <linux/fs.h>
35 #include <linux/init.h>
36 #include <linux/kernel.h>
37 #include <linux/kobject.h>
38 #include <linux/net.h>
39 #include <linux/sysrq.h>
40 #include <linux/highuid.h>
41 #include <linux/writeback.h>
42 #include <linux/ratelimit.h>
43 #include <linux/compaction.h>
44 #include <linux/hugetlb.h>
45 #include <linux/initrd.h>
46 #include <linux/key.h>
47 #include <linux/times.h>
48 #include <linux/limits.h>
49 #include <linux/dcache.h>
50 #include <linux/dnotify.h>
51 #include <linux/syscalls.h>
52 #include <linux/vmstat.h>
53 #include <linux/nfs_fs.h>
54 #include <linux/acpi.h>
55 #include <linux/reboot.h>
56 #include <linux/ftrace.h>
57 #include <linux/perf_event.h>
58 #include <linux/kprobes.h>
59 #include <linux/pipe_fs_i.h>
60 #include <linux/oom.h>
61 #include <linux/kmod.h>
62 #include <linux/capability.h>
63 #include <linux/binfmts.h>
64 #include <linux/sched/sysctl.h>
65 #include <linux/sched/coredump.h>
66 #include <linux/kexec.h>
67 #include <linux/bpf.h>
68 #include <linux/mount.h>
69
70 #include <linux/uaccess.h>
71 #include <asm/processor.h>
72
73 #ifdef CONFIG_X86
74 #include <asm/nmi.h>
75 #include <asm/stacktrace.h>
76 #include <asm/io.h>
77 #endif
78 #ifdef CONFIG_SPARC
79 #include <asm/setup.h>
80 #endif
81 #ifdef CONFIG_BSD_PROCESS_ACCT
82 #include <linux/acct.h>
83 #endif
84 #ifdef CONFIG_RT_MUTEXES
85 #include <linux/rtmutex.h>
86 #endif
87 #if defined(CONFIG_PROVE_LOCKING) || defined(CONFIG_LOCK_STAT)
88 #include <linux/lockdep.h>
89 #endif
90 #ifdef CONFIG_CHR_DEV_SG
91 #include <scsi/sg.h>
92 #endif
93 #ifdef CONFIG_STACKLEAK_RUNTIME_DISABLE
94 #include <linux/stackleak.h>
95 #endif
96 #ifdef CONFIG_LOCKUP_DETECTOR
97 #include <linux/nmi.h>
98 #endif
99
100 #if defined(CONFIG_SYSCTL)
101
102 /* External variables not in a header file. */
103 extern int suid_dumpable;
104 #ifdef CONFIG_COREDUMP
105 extern int core_uses_pid;
106 extern char core_pattern[];
107 extern unsigned int core_pipe_limit;
108 #endif
109 extern int pid_max;
110 extern int pid_max_min, pid_max_max;
111 extern int percpu_pagelist_fraction;
112 extern int latencytop_enabled;
113 extern unsigned int sysctl_nr_open_min, sysctl_nr_open_max;
114 #ifndef CONFIG_MMU
115 extern int sysctl_nr_trim_pages;
116 #endif
117
118 /* Constants used for minimum and  maximum */
119 #ifdef CONFIG_LOCKUP_DETECTOR
120 static int sixty = 60;
121 #endif
122
123 static int __maybe_unused neg_one = -1;
124
125 static int zero;
126 static int __maybe_unused one = 1;
127 static int __maybe_unused two = 2;
128 static int __maybe_unused four = 4;
129 static unsigned long one_ul = 1;
130 static int one_hundred = 100;
131 static int one_thousand = 1000;
132 #ifdef CONFIG_PRINTK
133 static int ten_thousand = 10000;
134 #endif
135 #ifdef CONFIG_PERF_EVENTS
136 static int six_hundred_forty_kb = 640 * 1024;
137 #endif
138
139 /* this is needed for the proc_doulongvec_minmax of vm_dirty_bytes */
140 static unsigned long dirty_bytes_min = 2 * PAGE_SIZE;
141
142 /* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */
143 static int maxolduid = 65535;
144 static int minolduid;
145
146 static int ngroups_max = NGROUPS_MAX;
147 static const int cap_last_cap = CAP_LAST_CAP;
148
149 /*
150  * This is needed for proc_doulongvec_minmax of sysctl_hung_task_timeout_secs
151  * and hung_task_check_interval_secs
152  */
153 #ifdef CONFIG_DETECT_HUNG_TASK
154 static unsigned long hung_task_timeout_max = (LONG_MAX/HZ);
155 #endif
156
157 #ifdef CONFIG_INOTIFY_USER
158 #include <linux/inotify.h>
159 #endif
160 #ifdef CONFIG_SPARC
161 #endif
162
163 #ifdef __hppa__
164 extern int pwrsw_enabled;
165 #endif
166
167 #ifdef CONFIG_SYSCTL_ARCH_UNALIGN_ALLOW
168 extern int unaligned_enabled;
169 #endif
170
171 #ifdef CONFIG_IA64
172 extern int unaligned_dump_stack;
173 #endif
174
175 #ifdef CONFIG_SYSCTL_ARCH_UNALIGN_NO_WARN
176 extern int no_unaligned_warning;
177 #endif
178
179 #ifdef CONFIG_PROC_SYSCTL
180
181 /**
182  * enum sysctl_writes_mode - supported sysctl write modes
183  *
184  * @SYSCTL_WRITES_LEGACY: each write syscall must fully contain the sysctl value
185  *      to be written, and multiple writes on the same sysctl file descriptor
186  *      will rewrite the sysctl value, regardless of file position. No warning
187  *      is issued when the initial position is not 0.
188  * @SYSCTL_WRITES_WARN: same as above but warn when the initial file position is
189  *      not 0.
190  * @SYSCTL_WRITES_STRICT: writes to numeric sysctl entries must always be at
191  *      file position 0 and the value must be fully contained in the buffer
192  *      sent to the write syscall. If dealing with strings respect the file
193  *      position, but restrict this to the max length of the buffer, anything
194  *      passed the max lenght will be ignored. Multiple writes will append
195  *      to the buffer.
196  *
197  * These write modes control how current file position affects the behavior of
198  * updating sysctl values through the proc interface on each write.
199  */
200 enum sysctl_writes_mode {
201         SYSCTL_WRITES_LEGACY            = -1,
202         SYSCTL_WRITES_WARN              = 0,
203         SYSCTL_WRITES_STRICT            = 1,
204 };
205
206 static enum sysctl_writes_mode sysctl_writes_strict = SYSCTL_WRITES_STRICT;
207
208 static int proc_do_cad_pid(struct ctl_table *table, int write,
209                   void __user *buffer, size_t *lenp, loff_t *ppos);
210 static int proc_taint(struct ctl_table *table, int write,
211                                void __user *buffer, size_t *lenp, loff_t *ppos);
212 #endif
213
214 #ifdef CONFIG_PRINTK
215 static int proc_dointvec_minmax_sysadmin(struct ctl_table *table, int write,
216                                 void __user *buffer, size_t *lenp, loff_t *ppos);
217 #endif
218
219 static int proc_dointvec_minmax_coredump(struct ctl_table *table, int write,
220                 void __user *buffer, size_t *lenp, loff_t *ppos);
221 #ifdef CONFIG_COREDUMP
222 static int proc_dostring_coredump(struct ctl_table *table, int write,
223                 void __user *buffer, size_t *lenp, loff_t *ppos);
224 #endif
225 static int proc_dopipe_max_size(struct ctl_table *table, int write,
226                 void __user *buffer, size_t *lenp, loff_t *ppos);
227 #ifdef CONFIG_BPF_SYSCALL
228 static int proc_dointvec_minmax_bpf_stats(struct ctl_table *table, int write,
229                                           void __user *buffer, size_t *lenp,
230                                           loff_t *ppos);
231 #endif
232
233 #ifdef CONFIG_MAGIC_SYSRQ
234 /* Note: sysrq code uses its own private copy */
235 static int __sysrq_enabled = CONFIG_MAGIC_SYSRQ_DEFAULT_ENABLE;
236
237 static int sysrq_sysctl_handler(struct ctl_table *table, int write,
238                                 void __user *buffer, size_t *lenp,
239                                 loff_t *ppos)
240 {
241         int error;
242
243         error = proc_dointvec(table, write, buffer, lenp, ppos);
244         if (error)
245                 return error;
246
247         if (write)
248                 sysrq_toggle_support(__sysrq_enabled);
249
250         return 0;
251 }
252
253 #endif
254
255 static struct ctl_table kern_table[];
256 static struct ctl_table vm_table[];
257 static struct ctl_table fs_table[];
258 static struct ctl_table debug_table[];
259 static struct ctl_table dev_table[];
260 extern struct ctl_table random_table[];
261 #ifdef CONFIG_EPOLL
262 extern struct ctl_table epoll_table[];
263 #endif
264
265 #ifdef CONFIG_FW_LOADER_USER_HELPER
266 extern struct ctl_table firmware_config_table[];
267 #endif
268
269 #ifdef HAVE_ARCH_PICK_MMAP_LAYOUT
270 int sysctl_legacy_va_layout;
271 #endif
272
273 /* The default sysctl tables: */
274
275 static struct ctl_table sysctl_base_table[] = {
276         {
277                 .procname       = "kernel",
278                 .mode           = 0555,
279                 .child          = kern_table,
280         },
281         {
282                 .procname       = "vm",
283                 .mode           = 0555,
284                 .child          = vm_table,
285         },
286         {
287                 .procname       = "fs",
288                 .mode           = 0555,
289                 .child          = fs_table,
290         },
291         {
292                 .procname       = "debug",
293                 .mode           = 0555,
294                 .child          = debug_table,
295         },
296         {
297                 .procname       = "dev",
298                 .mode           = 0555,
299                 .child          = dev_table,
300         },
301         { }
302 };
303
304 #ifdef CONFIG_SCHED_DEBUG
305 static int min_sched_granularity_ns = 100000;           /* 100 usecs */
306 static int max_sched_granularity_ns = NSEC_PER_SEC;     /* 1 second */
307 static int min_wakeup_granularity_ns;                   /* 0 usecs */
308 static int max_wakeup_granularity_ns = NSEC_PER_SEC;    /* 1 second */
309 #ifdef CONFIG_SMP
310 static int min_sched_tunable_scaling = SCHED_TUNABLESCALING_NONE;
311 static int max_sched_tunable_scaling = SCHED_TUNABLESCALING_END-1;
312 #endif /* CONFIG_SMP */
313 #endif /* CONFIG_SCHED_DEBUG */
314
315 #ifdef CONFIG_COMPACTION
316 static int min_extfrag_threshold;
317 static int max_extfrag_threshold = 1000;
318 #endif
319
320 static struct ctl_table kern_table[] = {
321         {
322                 .procname       = "sched_child_runs_first",
323                 .data           = &sysctl_sched_child_runs_first,
324                 .maxlen         = sizeof(unsigned int),
325                 .mode           = 0644,
326                 .proc_handler   = proc_dointvec,
327         },
328 #ifdef CONFIG_SCHED_DEBUG
329         {
330                 .procname       = "sched_min_granularity_ns",
331                 .data           = &sysctl_sched_min_granularity,
332                 .maxlen         = sizeof(unsigned int),
333                 .mode           = 0644,
334                 .proc_handler   = sched_proc_update_handler,
335                 .extra1         = &min_sched_granularity_ns,
336                 .extra2         = &max_sched_granularity_ns,
337         },
338         {
339                 .procname       = "sched_latency_ns",
340                 .data           = &sysctl_sched_latency,
341                 .maxlen         = sizeof(unsigned int),
342                 .mode           = 0644,
343                 .proc_handler   = sched_proc_update_handler,
344                 .extra1         = &min_sched_granularity_ns,
345                 .extra2         = &max_sched_granularity_ns,
346         },
347         {
348                 .procname       = "sched_wakeup_granularity_ns",
349                 .data           = &sysctl_sched_wakeup_granularity,
350                 .maxlen         = sizeof(unsigned int),
351                 .mode           = 0644,
352                 .proc_handler   = sched_proc_update_handler,
353                 .extra1         = &min_wakeup_granularity_ns,
354                 .extra2         = &max_wakeup_granularity_ns,
355         },
356 #ifdef CONFIG_SMP
357         {
358                 .procname       = "sched_tunable_scaling",
359                 .data           = &sysctl_sched_tunable_scaling,
360                 .maxlen         = sizeof(enum sched_tunable_scaling),
361                 .mode           = 0644,
362                 .proc_handler   = sched_proc_update_handler,
363                 .extra1         = &min_sched_tunable_scaling,
364                 .extra2         = &max_sched_tunable_scaling,
365         },
366         {
367                 .procname       = "sched_migration_cost_ns",
368                 .data           = &sysctl_sched_migration_cost,
369                 .maxlen         = sizeof(unsigned int),
370                 .mode           = 0644,
371                 .proc_handler   = proc_dointvec,
372         },
373         {
374                 .procname       = "sched_nr_migrate",
375                 .data           = &sysctl_sched_nr_migrate,
376                 .maxlen         = sizeof(unsigned int),
377                 .mode           = 0644,
378                 .proc_handler   = proc_dointvec,
379         },
380 #ifdef CONFIG_SCHEDSTATS
381         {
382                 .procname       = "sched_schedstats",
383                 .data           = NULL,
384                 .maxlen         = sizeof(unsigned int),
385                 .mode           = 0644,
386                 .proc_handler   = sysctl_schedstats,
387                 .extra1         = &zero,
388                 .extra2         = &one,
389         },
390 #endif /* CONFIG_SCHEDSTATS */
391 #endif /* CONFIG_SMP */
392 #ifdef CONFIG_NUMA_BALANCING
393         {
394                 .procname       = "numa_balancing_scan_delay_ms",
395                 .data           = &sysctl_numa_balancing_scan_delay,
396                 .maxlen         = sizeof(unsigned int),
397                 .mode           = 0644,
398                 .proc_handler   = proc_dointvec,
399         },
400         {
401                 .procname       = "numa_balancing_scan_period_min_ms",
402                 .data           = &sysctl_numa_balancing_scan_period_min,
403                 .maxlen         = sizeof(unsigned int),
404                 .mode           = 0644,
405                 .proc_handler   = proc_dointvec,
406         },
407         {
408                 .procname       = "numa_balancing_scan_period_max_ms",
409                 .data           = &sysctl_numa_balancing_scan_period_max,
410                 .maxlen         = sizeof(unsigned int),
411                 .mode           = 0644,
412                 .proc_handler   = proc_dointvec,
413         },
414         {
415                 .procname       = "numa_balancing_scan_size_mb",
416                 .data           = &sysctl_numa_balancing_scan_size,
417                 .maxlen         = sizeof(unsigned int),
418                 .mode           = 0644,
419                 .proc_handler   = proc_dointvec_minmax,
420                 .extra1         = &one,
421         },
422         {
423                 .procname       = "numa_balancing",
424                 .data           = NULL, /* filled in by handler */
425                 .maxlen         = sizeof(unsigned int),
426                 .mode           = 0644,
427                 .proc_handler   = sysctl_numa_balancing,
428                 .extra1         = &zero,
429                 .extra2         = &one,
430         },
431 #endif /* CONFIG_NUMA_BALANCING */
432 #endif /* CONFIG_SCHED_DEBUG */
433         {
434                 .procname       = "sched_rt_period_us",
435                 .data           = &sysctl_sched_rt_period,
436                 .maxlen         = sizeof(unsigned int),
437                 .mode           = 0644,
438                 .proc_handler   = sched_rt_handler,
439         },
440         {
441                 .procname       = "sched_rt_runtime_us",
442                 .data           = &sysctl_sched_rt_runtime,
443                 .maxlen         = sizeof(int),
444                 .mode           = 0644,
445                 .proc_handler   = sched_rt_handler,
446         },
447         {
448                 .procname       = "sched_rr_timeslice_ms",
449                 .data           = &sysctl_sched_rr_timeslice,
450                 .maxlen         = sizeof(int),
451                 .mode           = 0644,
452                 .proc_handler   = sched_rr_handler,
453         },
454 #ifdef CONFIG_SCHED_AUTOGROUP
455         {
456                 .procname       = "sched_autogroup_enabled",
457                 .data           = &sysctl_sched_autogroup_enabled,
458                 .maxlen         = sizeof(unsigned int),
459                 .mode           = 0644,
460                 .proc_handler   = proc_dointvec_minmax,
461                 .extra1         = &zero,
462                 .extra2         = &one,
463         },
464 #endif
465 #ifdef CONFIG_CFS_BANDWIDTH
466         {
467                 .procname       = "sched_cfs_bandwidth_slice_us",
468                 .data           = &sysctl_sched_cfs_bandwidth_slice,
469                 .maxlen         = sizeof(unsigned int),
470                 .mode           = 0644,
471                 .proc_handler   = proc_dointvec_minmax,
472                 .extra1         = &one,
473         },
474 #endif
475 #ifdef CONFIG_PROVE_LOCKING
476         {
477                 .procname       = "prove_locking",
478                 .data           = &prove_locking,
479                 .maxlen         = sizeof(int),
480                 .mode           = 0644,
481                 .proc_handler   = proc_dointvec,
482         },
483 #endif
484 #ifdef CONFIG_LOCK_STAT
485         {
486                 .procname       = "lock_stat",
487                 .data           = &lock_stat,
488                 .maxlen         = sizeof(int),
489                 .mode           = 0644,
490                 .proc_handler   = proc_dointvec,
491         },
492 #endif
493         {
494                 .procname       = "panic",
495                 .data           = &panic_timeout,
496                 .maxlen         = sizeof(int),
497                 .mode           = 0644,
498                 .proc_handler   = proc_dointvec,
499         },
500 #ifdef CONFIG_COREDUMP
501         {
502                 .procname       = "core_uses_pid",
503                 .data           = &core_uses_pid,
504                 .maxlen         = sizeof(int),
505                 .mode           = 0644,
506                 .proc_handler   = proc_dointvec,
507         },
508         {
509                 .procname       = "core_pattern",
510                 .data           = core_pattern,
511                 .maxlen         = CORENAME_MAX_SIZE,
512                 .mode           = 0644,
513                 .proc_handler   = proc_dostring_coredump,
514         },
515         {
516                 .procname       = "core_pipe_limit",
517                 .data           = &core_pipe_limit,
518                 .maxlen         = sizeof(unsigned int),
519                 .mode           = 0644,
520                 .proc_handler   = proc_dointvec,
521         },
522 #endif
523 #ifdef CONFIG_PROC_SYSCTL
524         {
525                 .procname       = "tainted",
526                 .maxlen         = sizeof(long),
527                 .mode           = 0644,
528                 .proc_handler   = proc_taint,
529         },
530         {
531                 .procname       = "sysctl_writes_strict",
532                 .data           = &sysctl_writes_strict,
533                 .maxlen         = sizeof(int),
534                 .mode           = 0644,
535                 .proc_handler   = proc_dointvec_minmax,
536                 .extra1         = &neg_one,
537                 .extra2         = &one,
538         },
539 #endif
540 #ifdef CONFIG_LATENCYTOP
541         {
542                 .procname       = "latencytop",
543                 .data           = &latencytop_enabled,
544                 .maxlen         = sizeof(int),
545                 .mode           = 0644,
546                 .proc_handler   = sysctl_latencytop,
547         },
548 #endif
549 #ifdef CONFIG_BLK_DEV_INITRD
550         {
551                 .procname       = "real-root-dev",
552                 .data           = &real_root_dev,
553                 .maxlen         = sizeof(int),
554                 .mode           = 0644,
555                 .proc_handler   = proc_dointvec,
556         },
557 #endif
558         {
559                 .procname       = "print-fatal-signals",
560                 .data           = &print_fatal_signals,
561                 .maxlen         = sizeof(int),
562                 .mode           = 0644,
563                 .proc_handler   = proc_dointvec,
564         },
565 #ifdef CONFIG_SPARC
566         {
567                 .procname       = "reboot-cmd",
568                 .data           = reboot_command,
569                 .maxlen         = 256,
570                 .mode           = 0644,
571                 .proc_handler   = proc_dostring,
572         },
573         {
574                 .procname       = "stop-a",
575                 .data           = &stop_a_enabled,
576                 .maxlen         = sizeof (int),
577                 .mode           = 0644,
578                 .proc_handler   = proc_dointvec,
579         },
580         {
581                 .procname       = "scons-poweroff",
582                 .data           = &scons_pwroff,
583                 .maxlen         = sizeof (int),
584                 .mode           = 0644,
585                 .proc_handler   = proc_dointvec,
586         },
587 #endif
588 #ifdef CONFIG_SPARC64
589         {
590                 .procname       = "tsb-ratio",
591                 .data           = &sysctl_tsb_ratio,
592                 .maxlen         = sizeof (int),
593                 .mode           = 0644,
594                 .proc_handler   = proc_dointvec,
595         },
596 #endif
597 #ifdef __hppa__
598         {
599                 .procname       = "soft-power",
600                 .data           = &pwrsw_enabled,
601                 .maxlen         = sizeof (int),
602                 .mode           = 0644,
603                 .proc_handler   = proc_dointvec,
604         },
605 #endif
606 #ifdef CONFIG_SYSCTL_ARCH_UNALIGN_ALLOW
607         {
608                 .procname       = "unaligned-trap",
609                 .data           = &unaligned_enabled,
610                 .maxlen         = sizeof (int),
611                 .mode           = 0644,
612                 .proc_handler   = proc_dointvec,
613         },
614 #endif
615         {
616                 .procname       = "ctrl-alt-del",
617                 .data           = &C_A_D,
618                 .maxlen         = sizeof(int),
619                 .mode           = 0644,
620                 .proc_handler   = proc_dointvec,
621         },
622 #ifdef CONFIG_FUNCTION_TRACER
623         {
624                 .procname       = "ftrace_enabled",
625                 .data           = &ftrace_enabled,
626                 .maxlen         = sizeof(int),
627                 .mode           = 0644,
628                 .proc_handler   = ftrace_enable_sysctl,
629         },
630 #endif
631 #ifdef CONFIG_STACK_TRACER
632         {
633                 .procname       = "stack_tracer_enabled",
634                 .data           = &stack_tracer_enabled,
635                 .maxlen         = sizeof(int),
636                 .mode           = 0644,
637                 .proc_handler   = stack_trace_sysctl,
638         },
639 #endif
640 #ifdef CONFIG_TRACING
641         {
642                 .procname       = "ftrace_dump_on_oops",
643                 .data           = &ftrace_dump_on_oops,
644                 .maxlen         = sizeof(int),
645                 .mode           = 0644,
646                 .proc_handler   = proc_dointvec,
647         },
648         {
649                 .procname       = "traceoff_on_warning",
650                 .data           = &__disable_trace_on_warning,
651                 .maxlen         = sizeof(__disable_trace_on_warning),
652                 .mode           = 0644,
653                 .proc_handler   = proc_dointvec,
654         },
655         {
656                 .procname       = "tracepoint_printk",
657                 .data           = &tracepoint_printk,
658                 .maxlen         = sizeof(tracepoint_printk),
659                 .mode           = 0644,
660                 .proc_handler   = tracepoint_printk_sysctl,
661         },
662 #endif
663 #ifdef CONFIG_KEXEC_CORE
664         {
665                 .procname       = "kexec_load_disabled",
666                 .data           = &kexec_load_disabled,
667                 .maxlen         = sizeof(int),
668                 .mode           = 0644,
669                 /* only handle a transition from default "0" to "1" */
670                 .proc_handler   = proc_dointvec_minmax,
671                 .extra1         = &one,
672                 .extra2         = &one,
673         },
674 #endif
675 #ifdef CONFIG_MODULES
676         {
677                 .procname       = "modprobe",
678                 .data           = &modprobe_path,
679                 .maxlen         = KMOD_PATH_LEN,
680                 .mode           = 0644,
681                 .proc_handler   = proc_dostring,
682         },
683         {
684                 .procname       = "modules_disabled",
685                 .data           = &modules_disabled,
686                 .maxlen         = sizeof(int),
687                 .mode           = 0644,
688                 /* only handle a transition from default "0" to "1" */
689                 .proc_handler   = proc_dointvec_minmax,
690                 .extra1         = &one,
691                 .extra2         = &one,
692         },
693 #endif
694 #ifdef CONFIG_UEVENT_HELPER
695         {
696                 .procname       = "hotplug",
697                 .data           = &uevent_helper,
698                 .maxlen         = UEVENT_HELPER_PATH_LEN,
699                 .mode           = 0644,
700                 .proc_handler   = proc_dostring,
701         },
702 #endif
703 #ifdef CONFIG_CHR_DEV_SG
704         {
705                 .procname       = "sg-big-buff",
706                 .data           = &sg_big_buff,
707                 .maxlen         = sizeof (int),
708                 .mode           = 0444,
709                 .proc_handler   = proc_dointvec,
710         },
711 #endif
712 #ifdef CONFIG_BSD_PROCESS_ACCT
713         {
714                 .procname       = "acct",
715                 .data           = &acct_parm,
716                 .maxlen         = 3*sizeof(int),
717                 .mode           = 0644,
718                 .proc_handler   = proc_dointvec,
719         },
720 #endif
721 #ifdef CONFIG_MAGIC_SYSRQ
722         {
723                 .procname       = "sysrq",
724                 .data           = &__sysrq_enabled,
725                 .maxlen         = sizeof (int),
726                 .mode           = 0644,
727                 .proc_handler   = sysrq_sysctl_handler,
728         },
729 #endif
730 #ifdef CONFIG_PROC_SYSCTL
731         {
732                 .procname       = "cad_pid",
733                 .data           = NULL,
734                 .maxlen         = sizeof (int),
735                 .mode           = 0600,
736                 .proc_handler   = proc_do_cad_pid,
737         },
738 #endif
739         {
740                 .procname       = "threads-max",
741                 .data           = NULL,
742                 .maxlen         = sizeof(int),
743                 .mode           = 0644,
744                 .proc_handler   = sysctl_max_threads,
745         },
746         {
747                 .procname       = "random",
748                 .mode           = 0555,
749                 .child          = random_table,
750         },
751         {
752                 .procname       = "usermodehelper",
753                 .mode           = 0555,
754                 .child          = usermodehelper_table,
755         },
756 #ifdef CONFIG_FW_LOADER_USER_HELPER
757         {
758                 .procname       = "firmware_config",
759                 .mode           = 0555,
760                 .child          = firmware_config_table,
761         },
762 #endif
763         {
764                 .procname       = "overflowuid",
765                 .data           = &overflowuid,
766                 .maxlen         = sizeof(int),
767                 .mode           = 0644,
768                 .proc_handler   = proc_dointvec_minmax,
769                 .extra1         = &minolduid,
770                 .extra2         = &maxolduid,
771         },
772         {
773                 .procname       = "overflowgid",
774                 .data           = &overflowgid,
775                 .maxlen         = sizeof(int),
776                 .mode           = 0644,
777                 .proc_handler   = proc_dointvec_minmax,
778                 .extra1         = &minolduid,
779                 .extra2         = &maxolduid,
780         },
781 #ifdef CONFIG_S390
782 #ifdef CONFIG_MATHEMU
783         {
784                 .procname       = "ieee_emulation_warnings",
785                 .data           = &sysctl_ieee_emulation_warnings,
786                 .maxlen         = sizeof(int),
787                 .mode           = 0644,
788                 .proc_handler   = proc_dointvec,
789         },
790 #endif
791         {
792                 .procname       = "userprocess_debug",
793                 .data           = &show_unhandled_signals,
794                 .maxlen         = sizeof(int),
795                 .mode           = 0644,
796                 .proc_handler   = proc_dointvec,
797         },
798 #endif
799         {
800                 .procname       = "pid_max",
801                 .data           = &pid_max,
802                 .maxlen         = sizeof (int),
803                 .mode           = 0644,
804                 .proc_handler   = proc_dointvec_minmax,
805                 .extra1         = &pid_max_min,
806                 .extra2         = &pid_max_max,
807         },
808         {
809                 .procname       = "panic_on_oops",
810                 .data           = &panic_on_oops,
811                 .maxlen         = sizeof(int),
812                 .mode           = 0644,
813                 .proc_handler   = proc_dointvec,
814         },
815         {
816                 .procname       = "panic_print",
817                 .data           = &panic_print,
818                 .maxlen         = sizeof(unsigned long),
819                 .mode           = 0644,
820                 .proc_handler   = proc_doulongvec_minmax,
821         },
822 #if defined CONFIG_PRINTK
823         {
824                 .procname       = "printk",
825                 .data           = &console_loglevel,
826                 .maxlen         = 4*sizeof(int),
827                 .mode           = 0644,
828                 .proc_handler   = proc_dointvec,
829         },
830         {
831                 .procname       = "printk_ratelimit",
832                 .data           = &printk_ratelimit_state.interval,
833                 .maxlen         = sizeof(int),
834                 .mode           = 0644,
835                 .proc_handler   = proc_dointvec_jiffies,
836         },
837         {
838                 .procname       = "printk_ratelimit_burst",
839                 .data           = &printk_ratelimit_state.burst,
840                 .maxlen         = sizeof(int),
841                 .mode           = 0644,
842                 .proc_handler   = proc_dointvec,
843         },
844         {
845                 .procname       = "printk_delay",
846                 .data           = &printk_delay_msec,
847                 .maxlen         = sizeof(int),
848                 .mode           = 0644,
849                 .proc_handler   = proc_dointvec_minmax,
850                 .extra1         = &zero,
851                 .extra2         = &ten_thousand,
852         },
853         {
854                 .procname       = "printk_devkmsg",
855                 .data           = devkmsg_log_str,
856                 .maxlen         = DEVKMSG_STR_MAX_SIZE,
857                 .mode           = 0644,
858                 .proc_handler   = devkmsg_sysctl_set_loglvl,
859         },
860         {
861                 .procname       = "dmesg_restrict",
862                 .data           = &dmesg_restrict,
863                 .maxlen         = sizeof(int),
864                 .mode           = 0644,
865                 .proc_handler   = proc_dointvec_minmax_sysadmin,
866                 .extra1         = &zero,
867                 .extra2         = &one,
868         },
869         {
870                 .procname       = "kptr_restrict",
871                 .data           = &kptr_restrict,
872                 .maxlen         = sizeof(int),
873                 .mode           = 0644,
874                 .proc_handler   = proc_dointvec_minmax_sysadmin,
875                 .extra1         = &zero,
876                 .extra2         = &two,
877         },
878 #endif
879         {
880                 .procname       = "ngroups_max",
881                 .data           = &ngroups_max,
882                 .maxlen         = sizeof (int),
883                 .mode           = 0444,
884                 .proc_handler   = proc_dointvec,
885         },
886         {
887                 .procname       = "cap_last_cap",
888                 .data           = (void *)&cap_last_cap,
889                 .maxlen         = sizeof(int),
890                 .mode           = 0444,
891                 .proc_handler   = proc_dointvec,
892         },
893 #if defined(CONFIG_LOCKUP_DETECTOR)
894         {
895                 .procname       = "watchdog",
896                 .data           = &watchdog_user_enabled,
897                 .maxlen         = sizeof(int),
898                 .mode           = 0644,
899                 .proc_handler   = proc_watchdog,
900                 .extra1         = &zero,
901                 .extra2         = &one,
902         },
903         {
904                 .procname       = "watchdog_thresh",
905                 .data           = &watchdog_thresh,
906                 .maxlen         = sizeof(int),
907                 .mode           = 0644,
908                 .proc_handler   = proc_watchdog_thresh,
909                 .extra1         = &zero,
910                 .extra2         = &sixty,
911         },
912         {
913                 .procname       = "nmi_watchdog",
914                 .data           = &nmi_watchdog_user_enabled,
915                 .maxlen         = sizeof(int),
916                 .mode           = NMI_WATCHDOG_SYSCTL_PERM,
917                 .proc_handler   = proc_nmi_watchdog,
918                 .extra1         = &zero,
919                 .extra2         = &one,
920         },
921         {
922                 .procname       = "watchdog_cpumask",
923                 .data           = &watchdog_cpumask_bits,
924                 .maxlen         = NR_CPUS,
925                 .mode           = 0644,
926                 .proc_handler   = proc_watchdog_cpumask,
927         },
928 #ifdef CONFIG_SOFTLOCKUP_DETECTOR
929         {
930                 .procname       = "soft_watchdog",
931                 .data           = &soft_watchdog_user_enabled,
932                 .maxlen         = sizeof(int),
933                 .mode           = 0644,
934                 .proc_handler   = proc_soft_watchdog,
935                 .extra1         = &zero,
936                 .extra2         = &one,
937         },
938         {
939                 .procname       = "softlockup_panic",
940                 .data           = &softlockup_panic,
941                 .maxlen         = sizeof(int),
942                 .mode           = 0644,
943                 .proc_handler   = proc_dointvec_minmax,
944                 .extra1         = &zero,
945                 .extra2         = &one,
946         },
947 #ifdef CONFIG_SMP
948         {
949                 .procname       = "softlockup_all_cpu_backtrace",
950                 .data           = &sysctl_softlockup_all_cpu_backtrace,
951                 .maxlen         = sizeof(int),
952                 .mode           = 0644,
953                 .proc_handler   = proc_dointvec_minmax,
954                 .extra1         = &zero,
955                 .extra2         = &one,
956         },
957 #endif /* CONFIG_SMP */
958 #endif
959 #ifdef CONFIG_HARDLOCKUP_DETECTOR
960         {
961                 .procname       = "hardlockup_panic",
962                 .data           = &hardlockup_panic,
963                 .maxlen         = sizeof(int),
964                 .mode           = 0644,
965                 .proc_handler   = proc_dointvec_minmax,
966                 .extra1         = &zero,
967                 .extra2         = &one,
968         },
969 #ifdef CONFIG_SMP
970         {
971                 .procname       = "hardlockup_all_cpu_backtrace",
972                 .data           = &sysctl_hardlockup_all_cpu_backtrace,
973                 .maxlen         = sizeof(int),
974                 .mode           = 0644,
975                 .proc_handler   = proc_dointvec_minmax,
976                 .extra1         = &zero,
977                 .extra2         = &one,
978         },
979 #endif /* CONFIG_SMP */
980 #endif
981 #endif
982
983 #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86)
984         {
985                 .procname       = "unknown_nmi_panic",
986                 .data           = &unknown_nmi_panic,
987                 .maxlen         = sizeof (int),
988                 .mode           = 0644,
989                 .proc_handler   = proc_dointvec,
990         },
991 #endif
992 #if defined(CONFIG_X86)
993         {
994                 .procname       = "panic_on_unrecovered_nmi",
995                 .data           = &panic_on_unrecovered_nmi,
996                 .maxlen         = sizeof(int),
997                 .mode           = 0644,
998                 .proc_handler   = proc_dointvec,
999         },
1000         {
1001                 .procname       = "panic_on_io_nmi",
1002                 .data           = &panic_on_io_nmi,
1003                 .maxlen         = sizeof(int),
1004                 .mode           = 0644,
1005                 .proc_handler   = proc_dointvec,
1006         },
1007 #ifdef CONFIG_DEBUG_STACKOVERFLOW
1008         {
1009                 .procname       = "panic_on_stackoverflow",
1010                 .data           = &sysctl_panic_on_stackoverflow,
1011                 .maxlen         = sizeof(int),
1012                 .mode           = 0644,
1013                 .proc_handler   = proc_dointvec,
1014         },
1015 #endif
1016         {
1017                 .procname       = "bootloader_type",
1018                 .data           = &bootloader_type,
1019                 .maxlen         = sizeof (int),
1020                 .mode           = 0444,
1021                 .proc_handler   = proc_dointvec,
1022         },
1023         {
1024                 .procname       = "bootloader_version",
1025                 .data           = &bootloader_version,
1026                 .maxlen         = sizeof (int),
1027                 .mode           = 0444,
1028                 .proc_handler   = proc_dointvec,
1029         },
1030         {
1031                 .procname       = "io_delay_type",
1032                 .data           = &io_delay_type,
1033                 .maxlen         = sizeof(int),
1034                 .mode           = 0644,
1035                 .proc_handler   = proc_dointvec,
1036         },
1037 #endif
1038 #if defined(CONFIG_MMU)
1039         {
1040                 .procname       = "randomize_va_space",
1041                 .data           = &randomize_va_space,
1042                 .maxlen         = sizeof(int),
1043                 .mode           = 0644,
1044                 .proc_handler   = proc_dointvec,
1045         },
1046 #endif
1047 #if defined(CONFIG_S390) && defined(CONFIG_SMP)
1048         {
1049                 .procname       = "spin_retry",
1050                 .data           = &spin_retry,
1051                 .maxlen         = sizeof (int),
1052                 .mode           = 0644,
1053                 .proc_handler   = proc_dointvec,
1054         },
1055 #endif
1056 #if     defined(CONFIG_ACPI_SLEEP) && defined(CONFIG_X86)
1057         {
1058                 .procname       = "acpi_video_flags",
1059                 .data           = &acpi_realmode_flags,
1060                 .maxlen         = sizeof (unsigned long),
1061                 .mode           = 0644,
1062                 .proc_handler   = proc_doulongvec_minmax,
1063         },
1064 #endif
1065 #ifdef CONFIG_SYSCTL_ARCH_UNALIGN_NO_WARN
1066         {
1067                 .procname       = "ignore-unaligned-usertrap",
1068                 .data           = &no_unaligned_warning,
1069                 .maxlen         = sizeof (int),
1070                 .mode           = 0644,
1071                 .proc_handler   = proc_dointvec,
1072         },
1073 #endif
1074 #ifdef CONFIG_IA64
1075         {
1076                 .procname       = "unaligned-dump-stack",
1077                 .data           = &unaligned_dump_stack,
1078                 .maxlen         = sizeof (int),
1079                 .mode           = 0644,
1080                 .proc_handler   = proc_dointvec,
1081         },
1082 #endif
1083 #ifdef CONFIG_DETECT_HUNG_TASK
1084         {
1085                 .procname       = "hung_task_panic",
1086                 .data           = &sysctl_hung_task_panic,
1087                 .maxlen         = sizeof(int),
1088                 .mode           = 0644,
1089                 .proc_handler   = proc_dointvec_minmax,
1090                 .extra1         = &zero,
1091                 .extra2         = &one,
1092         },
1093         {
1094                 .procname       = "hung_task_check_count",
1095                 .data           = &sysctl_hung_task_check_count,
1096                 .maxlen         = sizeof(int),
1097                 .mode           = 0644,
1098                 .proc_handler   = proc_dointvec_minmax,
1099                 .extra1         = &zero,
1100         },
1101         {
1102                 .procname       = "hung_task_timeout_secs",
1103                 .data           = &sysctl_hung_task_timeout_secs,
1104                 .maxlen         = sizeof(unsigned long),
1105                 .mode           = 0644,
1106                 .proc_handler   = proc_dohung_task_timeout_secs,
1107                 .extra2         = &hung_task_timeout_max,
1108         },
1109         {
1110                 .procname       = "hung_task_check_interval_secs",
1111                 .data           = &sysctl_hung_task_check_interval_secs,
1112                 .maxlen         = sizeof(unsigned long),
1113                 .mode           = 0644,
1114                 .proc_handler   = proc_dohung_task_timeout_secs,
1115                 .extra2         = &hung_task_timeout_max,
1116         },
1117         {
1118                 .procname       = "hung_task_warnings",
1119                 .data           = &sysctl_hung_task_warnings,
1120                 .maxlen         = sizeof(int),
1121                 .mode           = 0644,
1122                 .proc_handler   = proc_dointvec_minmax,
1123                 .extra1         = &neg_one,
1124         },
1125 #endif
1126 #ifdef CONFIG_RT_MUTEXES
1127         {
1128                 .procname       = "max_lock_depth",
1129                 .data           = &max_lock_depth,
1130                 .maxlen         = sizeof(int),
1131                 .mode           = 0644,
1132                 .proc_handler   = proc_dointvec,
1133         },
1134 #endif
1135         {
1136                 .procname       = "poweroff_cmd",
1137                 .data           = &poweroff_cmd,
1138                 .maxlen         = POWEROFF_CMD_PATH_LEN,
1139                 .mode           = 0644,
1140                 .proc_handler   = proc_dostring,
1141         },
1142 #ifdef CONFIG_KEYS
1143         {
1144                 .procname       = "keys",
1145                 .mode           = 0555,
1146                 .child          = key_sysctls,
1147         },
1148 #endif
1149 #ifdef CONFIG_PERF_EVENTS
1150         /*
1151          * User-space scripts rely on the existence of this file
1152          * as a feature check for perf_events being enabled.
1153          *
1154          * So it's an ABI, do not remove!
1155          */
1156         {
1157                 .procname       = "perf_event_paranoid",
1158                 .data           = &sysctl_perf_event_paranoid,
1159                 .maxlen         = sizeof(sysctl_perf_event_paranoid),
1160                 .mode           = 0644,
1161                 .proc_handler   = proc_dointvec,
1162         },
1163         {
1164                 .procname       = "perf_event_mlock_kb",
1165                 .data           = &sysctl_perf_event_mlock,
1166                 .maxlen         = sizeof(sysctl_perf_event_mlock),
1167                 .mode           = 0644,
1168                 .proc_handler   = proc_dointvec,
1169         },
1170         {
1171                 .procname       = "perf_event_max_sample_rate",
1172                 .data           = &sysctl_perf_event_sample_rate,
1173                 .maxlen         = sizeof(sysctl_perf_event_sample_rate),
1174                 .mode           = 0644,
1175                 .proc_handler   = perf_proc_update_handler,
1176                 .extra1         = &one,
1177         },
1178         {
1179                 .procname       = "perf_cpu_time_max_percent",
1180                 .data           = &sysctl_perf_cpu_time_max_percent,
1181                 .maxlen         = sizeof(sysctl_perf_cpu_time_max_percent),
1182                 .mode           = 0644,
1183                 .proc_handler   = perf_cpu_time_max_percent_handler,
1184                 .extra1         = &zero,
1185                 .extra2         = &one_hundred,
1186         },
1187         {
1188                 .procname       = "perf_event_max_stack",
1189                 .data           = &sysctl_perf_event_max_stack,
1190                 .maxlen         = sizeof(sysctl_perf_event_max_stack),
1191                 .mode           = 0644,
1192                 .proc_handler   = perf_event_max_stack_handler,
1193                 .extra1         = &zero,
1194                 .extra2         = &six_hundred_forty_kb,
1195         },
1196         {
1197                 .procname       = "perf_event_max_contexts_per_stack",
1198                 .data           = &sysctl_perf_event_max_contexts_per_stack,
1199                 .maxlen         = sizeof(sysctl_perf_event_max_contexts_per_stack),
1200                 .mode           = 0644,
1201                 .proc_handler   = perf_event_max_stack_handler,
1202                 .extra1         = &zero,
1203                 .extra2         = &one_thousand,
1204         },
1205 #endif
1206         {
1207                 .procname       = "panic_on_warn",
1208                 .data           = &panic_on_warn,
1209                 .maxlen         = sizeof(int),
1210                 .mode           = 0644,
1211                 .proc_handler   = proc_dointvec_minmax,
1212                 .extra1         = &zero,
1213                 .extra2         = &one,
1214         },
1215 #if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON)
1216         {
1217                 .procname       = "timer_migration",
1218                 .data           = &sysctl_timer_migration,
1219                 .maxlen         = sizeof(unsigned int),
1220                 .mode           = 0644,
1221                 .proc_handler   = timer_migration_handler,
1222                 .extra1         = &zero,
1223                 .extra2         = &one,
1224         },
1225 #endif
1226 #ifdef CONFIG_BPF_SYSCALL
1227         {
1228                 .procname       = "unprivileged_bpf_disabled",
1229                 .data           = &sysctl_unprivileged_bpf_disabled,
1230                 .maxlen         = sizeof(sysctl_unprivileged_bpf_disabled),
1231                 .mode           = 0644,
1232                 /* only handle a transition from default "0" to "1" */
1233                 .proc_handler   = proc_dointvec_minmax,
1234                 .extra1         = &one,
1235                 .extra2         = &one,
1236         },
1237         {
1238                 .procname       = "bpf_stats_enabled",
1239                 .data           = &sysctl_bpf_stats_enabled,
1240                 .maxlen         = sizeof(sysctl_bpf_stats_enabled),
1241                 .mode           = 0644,
1242                 .proc_handler   = proc_dointvec_minmax_bpf_stats,
1243                 .extra1         = &zero,
1244                 .extra2         = &one,
1245         },
1246 #endif
1247 #if defined(CONFIG_TREE_RCU) || defined(CONFIG_PREEMPT_RCU)
1248         {
1249                 .procname       = "panic_on_rcu_stall",
1250                 .data           = &sysctl_panic_on_rcu_stall,
1251                 .maxlen         = sizeof(sysctl_panic_on_rcu_stall),
1252                 .mode           = 0644,
1253                 .proc_handler   = proc_dointvec_minmax,
1254                 .extra1         = &zero,
1255                 .extra2         = &one,
1256         },
1257 #endif
1258 #ifdef CONFIG_STACKLEAK_RUNTIME_DISABLE
1259         {
1260                 .procname       = "stack_erasing",
1261                 .data           = NULL,
1262                 .maxlen         = sizeof(int),
1263                 .mode           = 0600,
1264                 .proc_handler   = stack_erasing_sysctl,
1265                 .extra1         = &zero,
1266                 .extra2         = &one,
1267         },
1268 #endif
1269         { }
1270 };
1271
1272 static struct ctl_table vm_table[] = {
1273         {
1274                 .procname       = "overcommit_memory",
1275                 .data           = &sysctl_overcommit_memory,
1276                 .maxlen         = sizeof(sysctl_overcommit_memory),
1277                 .mode           = 0644,
1278                 .proc_handler   = proc_dointvec_minmax,
1279                 .extra1         = &zero,
1280                 .extra2         = &two,
1281         },
1282         {
1283                 .procname       = "panic_on_oom",
1284                 .data           = &sysctl_panic_on_oom,
1285                 .maxlen         = sizeof(sysctl_panic_on_oom),
1286                 .mode           = 0644,
1287                 .proc_handler   = proc_dointvec_minmax,
1288                 .extra1         = &zero,
1289                 .extra2         = &two,
1290         },
1291         {
1292                 .procname       = "oom_kill_allocating_task",
1293                 .data           = &sysctl_oom_kill_allocating_task,
1294                 .maxlen         = sizeof(sysctl_oom_kill_allocating_task),
1295                 .mode           = 0644,
1296                 .proc_handler   = proc_dointvec,
1297         },
1298         {
1299                 .procname       = "oom_dump_tasks",
1300                 .data           = &sysctl_oom_dump_tasks,
1301                 .maxlen         = sizeof(sysctl_oom_dump_tasks),
1302                 .mode           = 0644,
1303                 .proc_handler   = proc_dointvec,
1304         },
1305         {
1306                 .procname       = "overcommit_ratio",
1307                 .data           = &sysctl_overcommit_ratio,
1308                 .maxlen         = sizeof(sysctl_overcommit_ratio),
1309                 .mode           = 0644,
1310                 .proc_handler   = overcommit_ratio_handler,
1311         },
1312         {
1313                 .procname       = "overcommit_kbytes",
1314                 .data           = &sysctl_overcommit_kbytes,
1315                 .maxlen         = sizeof(sysctl_overcommit_kbytes),
1316                 .mode           = 0644,
1317                 .proc_handler   = overcommit_kbytes_handler,
1318         },
1319         {
1320                 .procname       = "page-cluster", 
1321                 .data           = &page_cluster,
1322                 .maxlen         = sizeof(int),
1323                 .mode           = 0644,
1324                 .proc_handler   = proc_dointvec_minmax,
1325                 .extra1         = &zero,
1326         },
1327         {
1328                 .procname       = "dirty_background_ratio",
1329                 .data           = &dirty_background_ratio,
1330                 .maxlen         = sizeof(dirty_background_ratio),
1331                 .mode           = 0644,
1332                 .proc_handler   = dirty_background_ratio_handler,
1333                 .extra1         = &zero,
1334                 .extra2         = &one_hundred,
1335         },
1336         {
1337                 .procname       = "dirty_background_bytes",
1338                 .data           = &dirty_background_bytes,
1339                 .maxlen         = sizeof(dirty_background_bytes),
1340                 .mode           = 0644,
1341                 .proc_handler   = dirty_background_bytes_handler,
1342                 .extra1         = &one_ul,
1343         },
1344         {
1345                 .procname       = "dirty_ratio",
1346                 .data           = &vm_dirty_ratio,
1347                 .maxlen         = sizeof(vm_dirty_ratio),
1348                 .mode           = 0644,
1349                 .proc_handler   = dirty_ratio_handler,
1350                 .extra1         = &zero,
1351                 .extra2         = &one_hundred,
1352         },
1353         {
1354                 .procname       = "dirty_bytes",
1355                 .data           = &vm_dirty_bytes,
1356                 .maxlen         = sizeof(vm_dirty_bytes),
1357                 .mode           = 0644,
1358                 .proc_handler   = dirty_bytes_handler,
1359                 .extra1         = &dirty_bytes_min,
1360         },
1361         {
1362                 .procname       = "dirty_writeback_centisecs",
1363                 .data           = &dirty_writeback_interval,
1364                 .maxlen         = sizeof(dirty_writeback_interval),
1365                 .mode           = 0644,
1366                 .proc_handler   = dirty_writeback_centisecs_handler,
1367         },
1368         {
1369                 .procname       = "dirty_expire_centisecs",
1370                 .data           = &dirty_expire_interval,
1371                 .maxlen         = sizeof(dirty_expire_interval),
1372                 .mode           = 0644,
1373                 .proc_handler   = proc_dointvec_minmax,
1374                 .extra1         = &zero,
1375         },
1376         {
1377                 .procname       = "dirtytime_expire_seconds",
1378                 .data           = &dirtytime_expire_interval,
1379                 .maxlen         = sizeof(dirtytime_expire_interval),
1380                 .mode           = 0644,
1381                 .proc_handler   = dirtytime_interval_handler,
1382                 .extra1         = &zero,
1383         },
1384         {
1385                 .procname       = "swappiness",
1386                 .data           = &vm_swappiness,
1387                 .maxlen         = sizeof(vm_swappiness),
1388                 .mode           = 0644,
1389                 .proc_handler   = proc_dointvec_minmax,
1390                 .extra1         = &zero,
1391                 .extra2         = &one_hundred,
1392         },
1393 #ifdef CONFIG_HUGETLB_PAGE
1394         {
1395                 .procname       = "nr_hugepages",
1396                 .data           = NULL,
1397                 .maxlen         = sizeof(unsigned long),
1398                 .mode           = 0644,
1399                 .proc_handler   = hugetlb_sysctl_handler,
1400         },
1401 #ifdef CONFIG_NUMA
1402         {
1403                 .procname       = "nr_hugepages_mempolicy",
1404                 .data           = NULL,
1405                 .maxlen         = sizeof(unsigned long),
1406                 .mode           = 0644,
1407                 .proc_handler   = &hugetlb_mempolicy_sysctl_handler,
1408         },
1409         {
1410                 .procname               = "numa_stat",
1411                 .data                   = &sysctl_vm_numa_stat,
1412                 .maxlen                 = sizeof(int),
1413                 .mode                   = 0644,
1414                 .proc_handler   = sysctl_vm_numa_stat_handler,
1415                 .extra1                 = &zero,
1416                 .extra2                 = &one,
1417         },
1418 #endif
1419          {
1420                 .procname       = "hugetlb_shm_group",
1421                 .data           = &sysctl_hugetlb_shm_group,
1422                 .maxlen         = sizeof(gid_t),
1423                 .mode           = 0644,
1424                 .proc_handler   = proc_dointvec,
1425          },
1426         {
1427                 .procname       = "nr_overcommit_hugepages",
1428                 .data           = NULL,
1429                 .maxlen         = sizeof(unsigned long),
1430                 .mode           = 0644,
1431                 .proc_handler   = hugetlb_overcommit_handler,
1432         },
1433 #endif
1434         {
1435                 .procname       = "lowmem_reserve_ratio",
1436                 .data           = &sysctl_lowmem_reserve_ratio,
1437                 .maxlen         = sizeof(sysctl_lowmem_reserve_ratio),
1438                 .mode           = 0644,
1439                 .proc_handler   = lowmem_reserve_ratio_sysctl_handler,
1440         },
1441         {
1442                 .procname       = "drop_caches",
1443                 .data           = &sysctl_drop_caches,
1444                 .maxlen         = sizeof(int),
1445                 .mode           = 0644,
1446                 .proc_handler   = drop_caches_sysctl_handler,
1447                 .extra1         = &one,
1448                 .extra2         = &four,
1449         },
1450 #ifdef CONFIG_COMPACTION
1451         {
1452                 .procname       = "compact_memory",
1453                 .data           = &sysctl_compact_memory,
1454                 .maxlen         = sizeof(int),
1455                 .mode           = 0200,
1456                 .proc_handler   = sysctl_compaction_handler,
1457         },
1458         {
1459                 .procname       = "extfrag_threshold",
1460                 .data           = &sysctl_extfrag_threshold,
1461                 .maxlen         = sizeof(int),
1462                 .mode           = 0644,
1463                 .proc_handler   = proc_dointvec_minmax,
1464                 .extra1         = &min_extfrag_threshold,
1465                 .extra2         = &max_extfrag_threshold,
1466         },
1467         {
1468                 .procname       = "compact_unevictable_allowed",
1469                 .data           = &sysctl_compact_unevictable_allowed,
1470                 .maxlen         = sizeof(int),
1471                 .mode           = 0644,
1472                 .proc_handler   = proc_dointvec,
1473                 .extra1         = &zero,
1474                 .extra2         = &one,
1475         },
1476
1477 #endif /* CONFIG_COMPACTION */
1478         {
1479                 .procname       = "min_free_kbytes",
1480                 .data           = &min_free_kbytes,
1481                 .maxlen         = sizeof(min_free_kbytes),
1482                 .mode           = 0644,
1483                 .proc_handler   = min_free_kbytes_sysctl_handler,
1484                 .extra1         = &zero,
1485         },
1486         {
1487                 .procname       = "watermark_boost_factor",
1488                 .data           = &watermark_boost_factor,
1489                 .maxlen         = sizeof(watermark_boost_factor),
1490                 .mode           = 0644,
1491                 .proc_handler   = watermark_boost_factor_sysctl_handler,
1492                 .extra1         = &zero,
1493         },
1494         {
1495                 .procname       = "watermark_scale_factor",
1496                 .data           = &watermark_scale_factor,
1497                 .maxlen         = sizeof(watermark_scale_factor),
1498                 .mode           = 0644,
1499                 .proc_handler   = watermark_scale_factor_sysctl_handler,
1500                 .extra1         = &one,
1501                 .extra2         = &one_thousand,
1502         },
1503         {
1504                 .procname       = "percpu_pagelist_fraction",
1505                 .data           = &percpu_pagelist_fraction,
1506                 .maxlen         = sizeof(percpu_pagelist_fraction),
1507                 .mode           = 0644,
1508                 .proc_handler   = percpu_pagelist_fraction_sysctl_handler,
1509                 .extra1         = &zero,
1510         },
1511 #ifdef CONFIG_MMU
1512         {
1513                 .procname       = "max_map_count",
1514                 .data           = &sysctl_max_map_count,
1515                 .maxlen         = sizeof(sysctl_max_map_count),
1516                 .mode           = 0644,
1517                 .proc_handler   = proc_dointvec_minmax,
1518                 .extra1         = &zero,
1519         },
1520 #else
1521         {
1522                 .procname       = "nr_trim_pages",
1523                 .data           = &sysctl_nr_trim_pages,
1524                 .maxlen         = sizeof(sysctl_nr_trim_pages),
1525                 .mode           = 0644,
1526                 .proc_handler   = proc_dointvec_minmax,
1527                 .extra1         = &zero,
1528         },
1529 #endif
1530         {
1531                 .procname       = "laptop_mode",
1532                 .data           = &laptop_mode,
1533                 .maxlen         = sizeof(laptop_mode),
1534                 .mode           = 0644,
1535                 .proc_handler   = proc_dointvec_jiffies,
1536         },
1537         {
1538                 .procname       = "block_dump",
1539                 .data           = &block_dump,
1540                 .maxlen         = sizeof(block_dump),
1541                 .mode           = 0644,
1542                 .proc_handler   = proc_dointvec,
1543                 .extra1         = &zero,
1544         },
1545         {
1546                 .procname       = "vfs_cache_pressure",
1547                 .data           = &sysctl_vfs_cache_pressure,
1548                 .maxlen         = sizeof(sysctl_vfs_cache_pressure),
1549                 .mode           = 0644,
1550                 .proc_handler   = proc_dointvec,
1551                 .extra1         = &zero,
1552         },
1553 #ifdef HAVE_ARCH_PICK_MMAP_LAYOUT
1554         {
1555                 .procname       = "legacy_va_layout",
1556                 .data           = &sysctl_legacy_va_layout,
1557                 .maxlen         = sizeof(sysctl_legacy_va_layout),
1558                 .mode           = 0644,
1559                 .proc_handler   = proc_dointvec,
1560                 .extra1         = &zero,
1561         },
1562 #endif
1563 #ifdef CONFIG_NUMA
1564         {
1565                 .procname       = "zone_reclaim_mode",
1566                 .data           = &node_reclaim_mode,
1567                 .maxlen         = sizeof(node_reclaim_mode),
1568                 .mode           = 0644,
1569                 .proc_handler   = proc_dointvec,
1570                 .extra1         = &zero,
1571         },
1572         {
1573                 .procname       = "min_unmapped_ratio",
1574                 .data           = &sysctl_min_unmapped_ratio,
1575                 .maxlen         = sizeof(sysctl_min_unmapped_ratio),
1576                 .mode           = 0644,
1577                 .proc_handler   = sysctl_min_unmapped_ratio_sysctl_handler,
1578                 .extra1         = &zero,
1579                 .extra2         = &one_hundred,
1580         },
1581         {
1582                 .procname       = "min_slab_ratio",
1583                 .data           = &sysctl_min_slab_ratio,
1584                 .maxlen         = sizeof(sysctl_min_slab_ratio),
1585                 .mode           = 0644,
1586                 .proc_handler   = sysctl_min_slab_ratio_sysctl_handler,
1587                 .extra1         = &zero,
1588                 .extra2         = &one_hundred,
1589         },
1590 #endif
1591 #ifdef CONFIG_SMP
1592         {
1593                 .procname       = "stat_interval",
1594                 .data           = &sysctl_stat_interval,
1595                 .maxlen         = sizeof(sysctl_stat_interval),
1596                 .mode           = 0644,
1597                 .proc_handler   = proc_dointvec_jiffies,
1598         },
1599         {
1600                 .procname       = "stat_refresh",
1601                 .data           = NULL,
1602                 .maxlen         = 0,
1603                 .mode           = 0600,
1604                 .proc_handler   = vmstat_refresh,
1605         },
1606 #endif
1607 #ifdef CONFIG_MMU
1608         {
1609                 .procname       = "mmap_min_addr",
1610                 .data           = &dac_mmap_min_addr,
1611                 .maxlen         = sizeof(unsigned long),
1612                 .mode           = 0644,
1613                 .proc_handler   = mmap_min_addr_handler,
1614         },
1615 #endif
1616 #ifdef CONFIG_NUMA
1617         {
1618                 .procname       = "numa_zonelist_order",
1619                 .data           = &numa_zonelist_order,
1620                 .maxlen         = NUMA_ZONELIST_ORDER_LEN,
1621                 .mode           = 0644,
1622                 .proc_handler   = numa_zonelist_order_handler,
1623         },
1624 #endif
1625 #if (defined(CONFIG_X86_32) && !defined(CONFIG_UML))|| \
1626    (defined(CONFIG_SUPERH) && defined(CONFIG_VSYSCALL))
1627         {
1628                 .procname       = "vdso_enabled",
1629 #ifdef CONFIG_X86_32
1630                 .data           = &vdso32_enabled,
1631                 .maxlen         = sizeof(vdso32_enabled),
1632 #else
1633                 .data           = &vdso_enabled,
1634                 .maxlen         = sizeof(vdso_enabled),
1635 #endif
1636                 .mode           = 0644,
1637                 .proc_handler   = proc_dointvec,
1638                 .extra1         = &zero,
1639         },
1640 #endif
1641 #ifdef CONFIG_HIGHMEM
1642         {
1643                 .procname       = "highmem_is_dirtyable",
1644                 .data           = &vm_highmem_is_dirtyable,
1645                 .maxlen         = sizeof(vm_highmem_is_dirtyable),
1646                 .mode           = 0644,
1647                 .proc_handler   = proc_dointvec_minmax,
1648                 .extra1         = &zero,
1649                 .extra2         = &one,
1650         },
1651 #endif
1652 #ifdef CONFIG_MEMORY_FAILURE
1653         {
1654                 .procname       = "memory_failure_early_kill",
1655                 .data           = &sysctl_memory_failure_early_kill,
1656                 .maxlen         = sizeof(sysctl_memory_failure_early_kill),
1657                 .mode           = 0644,
1658                 .proc_handler   = proc_dointvec_minmax,
1659                 .extra1         = &zero,
1660                 .extra2         = &one,
1661         },
1662         {
1663                 .procname       = "memory_failure_recovery",
1664                 .data           = &sysctl_memory_failure_recovery,
1665                 .maxlen         = sizeof(sysctl_memory_failure_recovery),
1666                 .mode           = 0644,
1667                 .proc_handler   = proc_dointvec_minmax,
1668                 .extra1         = &zero,
1669                 .extra2         = &one,
1670         },
1671 #endif
1672         {
1673                 .procname       = "user_reserve_kbytes",
1674                 .data           = &sysctl_user_reserve_kbytes,
1675                 .maxlen         = sizeof(sysctl_user_reserve_kbytes),
1676                 .mode           = 0644,
1677                 .proc_handler   = proc_doulongvec_minmax,
1678         },
1679         {
1680                 .procname       = "admin_reserve_kbytes",
1681                 .data           = &sysctl_admin_reserve_kbytes,
1682                 .maxlen         = sizeof(sysctl_admin_reserve_kbytes),
1683                 .mode           = 0644,
1684                 .proc_handler   = proc_doulongvec_minmax,
1685         },
1686 #ifdef CONFIG_HAVE_ARCH_MMAP_RND_BITS
1687         {
1688                 .procname       = "mmap_rnd_bits",
1689                 .data           = &mmap_rnd_bits,
1690                 .maxlen         = sizeof(mmap_rnd_bits),
1691                 .mode           = 0600,
1692                 .proc_handler   = proc_dointvec_minmax,
1693                 .extra1         = (void *)&mmap_rnd_bits_min,
1694                 .extra2         = (void *)&mmap_rnd_bits_max,
1695         },
1696 #endif
1697 #ifdef CONFIG_HAVE_ARCH_MMAP_RND_COMPAT_BITS
1698         {
1699                 .procname       = "mmap_rnd_compat_bits",
1700                 .data           = &mmap_rnd_compat_bits,
1701                 .maxlen         = sizeof(mmap_rnd_compat_bits),
1702                 .mode           = 0600,
1703                 .proc_handler   = proc_dointvec_minmax,
1704                 .extra1         = (void *)&mmap_rnd_compat_bits_min,
1705                 .extra2         = (void *)&mmap_rnd_compat_bits_max,
1706         },
1707 #endif
1708         { }
1709 };
1710
1711 static struct ctl_table fs_table[] = {
1712         {
1713                 .procname       = "inode-nr",
1714                 .data           = &inodes_stat,
1715                 .maxlen         = 2*sizeof(long),
1716                 .mode           = 0444,
1717                 .proc_handler   = proc_nr_inodes,
1718         },
1719         {
1720                 .procname       = "inode-state",
1721                 .data           = &inodes_stat,
1722                 .maxlen         = 7*sizeof(long),
1723                 .mode           = 0444,
1724                 .proc_handler   = proc_nr_inodes,
1725         },
1726         {
1727                 .procname       = "file-nr",
1728                 .data           = &files_stat,
1729                 .maxlen         = sizeof(files_stat),
1730                 .mode           = 0444,
1731                 .proc_handler   = proc_nr_files,
1732         },
1733         {
1734                 .procname       = "file-max",
1735                 .data           = &files_stat.max_files,
1736                 .maxlen         = sizeof(files_stat.max_files),
1737                 .mode           = 0644,
1738                 .proc_handler   = proc_doulongvec_minmax,
1739         },
1740         {
1741                 .procname       = "nr_open",
1742                 .data           = &sysctl_nr_open,
1743                 .maxlen         = sizeof(unsigned int),
1744                 .mode           = 0644,
1745                 .proc_handler   = proc_dointvec_minmax,
1746                 .extra1         = &sysctl_nr_open_min,
1747                 .extra2         = &sysctl_nr_open_max,
1748         },
1749         {
1750                 .procname       = "dentry-state",
1751                 .data           = &dentry_stat,
1752                 .maxlen         = 6*sizeof(long),
1753                 .mode           = 0444,
1754                 .proc_handler   = proc_nr_dentry,
1755         },
1756         {
1757                 .procname       = "overflowuid",
1758                 .data           = &fs_overflowuid,
1759                 .maxlen         = sizeof(int),
1760                 .mode           = 0644,
1761                 .proc_handler   = proc_dointvec_minmax,
1762                 .extra1         = &minolduid,
1763                 .extra2         = &maxolduid,
1764         },
1765         {
1766                 .procname       = "overflowgid",
1767                 .data           = &fs_overflowgid,
1768                 .maxlen         = sizeof(int),
1769                 .mode           = 0644,
1770                 .proc_handler   = proc_dointvec_minmax,
1771                 .extra1         = &minolduid,
1772                 .extra2         = &maxolduid,
1773         },
1774 #ifdef CONFIG_FILE_LOCKING
1775         {
1776                 .procname       = "leases-enable",
1777                 .data           = &leases_enable,
1778                 .maxlen         = sizeof(int),
1779                 .mode           = 0644,
1780                 .proc_handler   = proc_dointvec,
1781         },
1782 #endif
1783 #ifdef CONFIG_DNOTIFY
1784         {
1785                 .procname       = "dir-notify-enable",
1786                 .data           = &dir_notify_enable,
1787                 .maxlen         = sizeof(int),
1788                 .mode           = 0644,
1789                 .proc_handler   = proc_dointvec,
1790         },
1791 #endif
1792 #ifdef CONFIG_MMU
1793 #ifdef CONFIG_FILE_LOCKING
1794         {
1795                 .procname       = "lease-break-time",
1796                 .data           = &lease_break_time,
1797                 .maxlen         = sizeof(int),
1798                 .mode           = 0644,
1799                 .proc_handler   = proc_dointvec,
1800         },
1801 #endif
1802 #ifdef CONFIG_AIO
1803         {
1804                 .procname       = "aio-nr",
1805                 .data           = &aio_nr,
1806                 .maxlen         = sizeof(aio_nr),
1807                 .mode           = 0444,
1808                 .proc_handler   = proc_doulongvec_minmax,
1809         },
1810         {
1811                 .procname       = "aio-max-nr",
1812                 .data           = &aio_max_nr,
1813                 .maxlen         = sizeof(aio_max_nr),
1814                 .mode           = 0644,
1815                 .proc_handler   = proc_doulongvec_minmax,
1816         },
1817 #endif /* CONFIG_AIO */
1818 #ifdef CONFIG_INOTIFY_USER
1819         {
1820                 .procname       = "inotify",
1821                 .mode           = 0555,
1822                 .child          = inotify_table,
1823         },
1824 #endif  
1825 #ifdef CONFIG_EPOLL
1826         {
1827                 .procname       = "epoll",
1828                 .mode           = 0555,
1829                 .child          = epoll_table,
1830         },
1831 #endif
1832 #endif
1833         {
1834                 .procname       = "protected_symlinks",
1835                 .data           = &sysctl_protected_symlinks,
1836                 .maxlen         = sizeof(int),
1837                 .mode           = 0600,
1838                 .proc_handler   = proc_dointvec_minmax,
1839                 .extra1         = &zero,
1840                 .extra2         = &one,
1841         },
1842         {
1843                 .procname       = "protected_hardlinks",
1844                 .data           = &sysctl_protected_hardlinks,
1845                 .maxlen         = sizeof(int),
1846                 .mode           = 0600,
1847                 .proc_handler   = proc_dointvec_minmax,
1848                 .extra1         = &zero,
1849                 .extra2         = &one,
1850         },
1851         {
1852                 .procname       = "protected_fifos",
1853                 .data           = &sysctl_protected_fifos,
1854                 .maxlen         = sizeof(int),
1855                 .mode           = 0600,
1856                 .proc_handler   = proc_dointvec_minmax,
1857                 .extra1         = &zero,
1858                 .extra2         = &two,
1859         },
1860         {
1861                 .procname       = "protected_regular",
1862                 .data           = &sysctl_protected_regular,
1863                 .maxlen         = sizeof(int),
1864                 .mode           = 0600,
1865                 .proc_handler   = proc_dointvec_minmax,
1866                 .extra1         = &zero,
1867                 .extra2         = &two,
1868         },
1869         {
1870                 .procname       = "suid_dumpable",
1871                 .data           = &suid_dumpable,
1872                 .maxlen         = sizeof(int),
1873                 .mode           = 0644,
1874                 .proc_handler   = proc_dointvec_minmax_coredump,
1875                 .extra1         = &zero,
1876                 .extra2         = &two,
1877         },
1878 #if defined(CONFIG_BINFMT_MISC) || defined(CONFIG_BINFMT_MISC_MODULE)
1879         {
1880                 .procname       = "binfmt_misc",
1881                 .mode           = 0555,
1882                 .child          = sysctl_mount_point,
1883         },
1884 #endif
1885         {
1886                 .procname       = "pipe-max-size",
1887                 .data           = &pipe_max_size,
1888                 .maxlen         = sizeof(pipe_max_size),
1889                 .mode           = 0644,
1890                 .proc_handler   = proc_dopipe_max_size,
1891         },
1892         {
1893                 .procname       = "pipe-user-pages-hard",
1894                 .data           = &pipe_user_pages_hard,
1895                 .maxlen         = sizeof(pipe_user_pages_hard),
1896                 .mode           = 0644,
1897                 .proc_handler   = proc_doulongvec_minmax,
1898         },
1899         {
1900                 .procname       = "pipe-user-pages-soft",
1901                 .data           = &pipe_user_pages_soft,
1902                 .maxlen         = sizeof(pipe_user_pages_soft),
1903                 .mode           = 0644,
1904                 .proc_handler   = proc_doulongvec_minmax,
1905         },
1906         {
1907                 .procname       = "mount-max",
1908                 .data           = &sysctl_mount_max,
1909                 .maxlen         = sizeof(unsigned int),
1910                 .mode           = 0644,
1911                 .proc_handler   = proc_dointvec_minmax,
1912                 .extra1         = &one,
1913         },
1914         { }
1915 };
1916
1917 static struct ctl_table debug_table[] = {
1918 #ifdef CONFIG_SYSCTL_EXCEPTION_TRACE
1919         {
1920                 .procname       = "exception-trace",
1921                 .data           = &show_unhandled_signals,
1922                 .maxlen         = sizeof(int),
1923                 .mode           = 0644,
1924                 .proc_handler   = proc_dointvec
1925         },
1926 #endif
1927 #if defined(CONFIG_OPTPROBES)
1928         {
1929                 .procname       = "kprobes-optimization",
1930                 .data           = &sysctl_kprobes_optimization,
1931                 .maxlen         = sizeof(int),
1932                 .mode           = 0644,
1933                 .proc_handler   = proc_kprobes_optimization_handler,
1934                 .extra1         = &zero,
1935                 .extra2         = &one,
1936         },
1937 #endif
1938         { }
1939 };
1940
1941 static struct ctl_table dev_table[] = {
1942         { }
1943 };
1944
1945 int __init sysctl_init(void)
1946 {
1947         struct ctl_table_header *hdr;
1948
1949         hdr = register_sysctl_table(sysctl_base_table);
1950         kmemleak_not_leak(hdr);
1951         return 0;
1952 }
1953
1954 #endif /* CONFIG_SYSCTL */
1955
1956 /*
1957  * /proc/sys support
1958  */
1959
1960 #ifdef CONFIG_PROC_SYSCTL
1961
1962 static int _proc_do_string(char *data, int maxlen, int write,
1963                            char __user *buffer,
1964                            size_t *lenp, loff_t *ppos)
1965 {
1966         size_t len;
1967         char __user *p;
1968         char c;
1969
1970         if (!data || !maxlen || !*lenp) {
1971                 *lenp = 0;
1972                 return 0;
1973         }
1974
1975         if (write) {
1976                 if (sysctl_writes_strict == SYSCTL_WRITES_STRICT) {
1977                         /* Only continue writes not past the end of buffer. */
1978                         len = strlen(data);
1979                         if (len > maxlen - 1)
1980                                 len = maxlen - 1;
1981
1982                         if (*ppos > len)
1983                                 return 0;
1984                         len = *ppos;
1985                 } else {
1986                         /* Start writing from beginning of buffer. */
1987                         len = 0;
1988                 }
1989
1990                 *ppos += *lenp;
1991                 p = buffer;
1992                 while ((p - buffer) < *lenp && len < maxlen - 1) {
1993                         if (get_user(c, p++))
1994                                 return -EFAULT;
1995                         if (c == 0 || c == '\n')
1996                                 break;
1997                         data[len++] = c;
1998                 }
1999                 data[len] = 0;
2000         } else {
2001                 len = strlen(data);
2002                 if (len > maxlen)
2003                         len = maxlen;
2004
2005                 if (*ppos > len) {
2006                         *lenp = 0;
2007                         return 0;
2008                 }
2009
2010                 data += *ppos;
2011                 len  -= *ppos;
2012
2013                 if (len > *lenp)
2014                         len = *lenp;
2015                 if (len)
2016                         if (copy_to_user(buffer, data, len))
2017                                 return -EFAULT;
2018                 if (len < *lenp) {
2019                         if (put_user('\n', buffer + len))
2020                                 return -EFAULT;
2021                         len++;
2022                 }
2023                 *lenp = len;
2024                 *ppos += len;
2025         }
2026         return 0;
2027 }
2028
2029 static void warn_sysctl_write(struct ctl_table *table)
2030 {
2031         pr_warn_once("%s wrote to %s when file position was not 0!\n"
2032                 "This will not be supported in the future. To silence this\n"
2033                 "warning, set kernel.sysctl_writes_strict = -1\n",
2034                 current->comm, table->procname);
2035 }
2036
2037 /**
2038  * proc_first_pos_non_zero_ignore - check if first position is allowed
2039  * @ppos: file position
2040  * @table: the sysctl table
2041  *
2042  * Returns true if the first position is non-zero and the sysctl_writes_strict
2043  * mode indicates this is not allowed for numeric input types. String proc
2044  * handlers can ignore the return value.
2045  */
2046 static bool proc_first_pos_non_zero_ignore(loff_t *ppos,
2047                                            struct ctl_table *table)
2048 {
2049         if (!*ppos)
2050                 return false;
2051
2052         switch (sysctl_writes_strict) {
2053         case SYSCTL_WRITES_STRICT:
2054                 return true;
2055         case SYSCTL_WRITES_WARN:
2056                 warn_sysctl_write(table);
2057                 return false;
2058         default:
2059                 return false;
2060         }
2061 }
2062
2063 /**
2064  * proc_dostring - read a string sysctl
2065  * @table: the sysctl table
2066  * @write: %TRUE if this is a write to the sysctl file
2067  * @buffer: the user buffer
2068  * @lenp: the size of the user buffer
2069  * @ppos: file position
2070  *
2071  * Reads/writes a string from/to the user buffer. If the kernel
2072  * buffer provided is not large enough to hold the string, the
2073  * string is truncated. The copied string is %NULL-terminated.
2074  * If the string is being read by the user process, it is copied
2075  * and a newline '\n' is added. It is truncated if the buffer is
2076  * not large enough.
2077  *
2078  * Returns 0 on success.
2079  */
2080 int proc_dostring(struct ctl_table *table, int write,
2081                   void __user *buffer, size_t *lenp, loff_t *ppos)
2082 {
2083         if (write)
2084                 proc_first_pos_non_zero_ignore(ppos, table);
2085
2086         return _proc_do_string((char *)(table->data), table->maxlen, write,
2087                                (char __user *)buffer, lenp, ppos);
2088 }
2089
2090 static size_t proc_skip_spaces(char **buf)
2091 {
2092         size_t ret;
2093         char *tmp = skip_spaces(*buf);
2094         ret = tmp - *buf;
2095         *buf = tmp;
2096         return ret;
2097 }
2098
2099 static void proc_skip_char(char **buf, size_t *size, const char v)
2100 {
2101         while (*size) {
2102                 if (**buf != v)
2103                         break;
2104                 (*size)--;
2105                 (*buf)++;
2106         }
2107 }
2108
2109 #define TMPBUFLEN 22
2110 /**
2111  * proc_get_long - reads an ASCII formatted integer from a user buffer
2112  *
2113  * @buf: a kernel buffer
2114  * @size: size of the kernel buffer
2115  * @val: this is where the number will be stored
2116  * @neg: set to %TRUE if number is negative
2117  * @perm_tr: a vector which contains the allowed trailers
2118  * @perm_tr_len: size of the perm_tr vector
2119  * @tr: pointer to store the trailer character
2120  *
2121  * In case of success %0 is returned and @buf and @size are updated with
2122  * the amount of bytes read. If @tr is non-NULL and a trailing
2123  * character exists (size is non-zero after returning from this
2124  * function), @tr is updated with the trailing character.
2125  */
2126 static int proc_get_long(char **buf, size_t *size,
2127                           unsigned long *val, bool *neg,
2128                           const char *perm_tr, unsigned perm_tr_len, char *tr)
2129 {
2130         int len;
2131         char *p, tmp[TMPBUFLEN];
2132
2133         if (!*size)
2134                 return -EINVAL;
2135
2136         len = *size;
2137         if (len > TMPBUFLEN - 1)
2138                 len = TMPBUFLEN - 1;
2139
2140         memcpy(tmp, *buf, len);
2141
2142         tmp[len] = 0;
2143         p = tmp;
2144         if (*p == '-' && *size > 1) {
2145                 *neg = true;
2146                 p++;
2147         } else
2148                 *neg = false;
2149         if (!isdigit(*p))
2150                 return -EINVAL;
2151
2152         *val = simple_strtoul(p, &p, 0);
2153
2154         len = p - tmp;
2155
2156         /* We don't know if the next char is whitespace thus we may accept
2157          * invalid integers (e.g. 1234...a) or two integers instead of one
2158          * (e.g. 123...1). So lets not allow such large numbers. */
2159         if (len == TMPBUFLEN - 1)
2160                 return -EINVAL;
2161
2162         if (len < *size && perm_tr_len && !memchr(perm_tr, *p, perm_tr_len))
2163                 return -EINVAL;
2164
2165         if (tr && (len < *size))
2166                 *tr = *p;
2167
2168         *buf += len;
2169         *size -= len;
2170
2171         return 0;
2172 }
2173
2174 /**
2175  * proc_put_long - converts an integer to a decimal ASCII formatted string
2176  *
2177  * @buf: the user buffer
2178  * @size: the size of the user buffer
2179  * @val: the integer to be converted
2180  * @neg: sign of the number, %TRUE for negative
2181  *
2182  * In case of success %0 is returned and @buf and @size are updated with
2183  * the amount of bytes written.
2184  */
2185 static int proc_put_long(void __user **buf, size_t *size, unsigned long val,
2186                           bool neg)
2187 {
2188         int len;
2189         char tmp[TMPBUFLEN], *p = tmp;
2190
2191         sprintf(p, "%s%lu", neg ? "-" : "", val);
2192         len = strlen(tmp);
2193         if (len > *size)
2194                 len = *size;
2195         if (copy_to_user(*buf, tmp, len))
2196                 return -EFAULT;
2197         *size -= len;
2198         *buf += len;
2199         return 0;
2200 }
2201 #undef TMPBUFLEN
2202
2203 static int proc_put_char(void __user **buf, size_t *size, char c)
2204 {
2205         if (*size) {
2206                 char __user **buffer = (char __user **)buf;
2207                 if (put_user(c, *buffer))
2208                         return -EFAULT;
2209                 (*size)--, (*buffer)++;
2210                 *buf = *buffer;
2211         }
2212         return 0;
2213 }
2214
2215 static int do_proc_dointvec_conv(bool *negp, unsigned long *lvalp,
2216                                  int *valp,
2217                                  int write, void *data)
2218 {
2219         if (write) {
2220                 if (*negp) {
2221                         if (*lvalp > (unsigned long) INT_MAX + 1)
2222                                 return -EINVAL;
2223                         *valp = -*lvalp;
2224                 } else {
2225                         if (*lvalp > (unsigned long) INT_MAX)
2226                                 return -EINVAL;
2227                         *valp = *lvalp;
2228                 }
2229         } else {
2230                 int val = *valp;
2231                 if (val < 0) {
2232                         *negp = true;
2233                         *lvalp = -(unsigned long)val;
2234                 } else {
2235                         *negp = false;
2236                         *lvalp = (unsigned long)val;
2237                 }
2238         }
2239         return 0;
2240 }
2241
2242 static int do_proc_douintvec_conv(unsigned long *lvalp,
2243                                   unsigned int *valp,
2244                                   int write, void *data)
2245 {
2246         if (write) {
2247                 if (*lvalp > UINT_MAX)
2248                         return -EINVAL;
2249                 *valp = *lvalp;
2250         } else {
2251                 unsigned int val = *valp;
2252                 *lvalp = (unsigned long)val;
2253         }
2254         return 0;
2255 }
2256
2257 static const char proc_wspace_sep[] = { ' ', '\t', '\n' };
2258
2259 static int __do_proc_dointvec(void *tbl_data, struct ctl_table *table,
2260                   int write, void __user *buffer,
2261                   size_t *lenp, loff_t *ppos,
2262                   int (*conv)(bool *negp, unsigned long *lvalp, int *valp,
2263                               int write, void *data),
2264                   void *data)
2265 {
2266         int *i, vleft, first = 1, err = 0;
2267         size_t left;
2268         char *kbuf = NULL, *p;
2269         
2270         if (!tbl_data || !table->maxlen || !*lenp || (*ppos && !write)) {
2271                 *lenp = 0;
2272                 return 0;
2273         }
2274         
2275         i = (int *) tbl_data;
2276         vleft = table->maxlen / sizeof(*i);
2277         left = *lenp;
2278
2279         if (!conv)
2280                 conv = do_proc_dointvec_conv;
2281
2282         if (write) {
2283                 if (proc_first_pos_non_zero_ignore(ppos, table))
2284                         goto out;
2285
2286                 if (left > PAGE_SIZE - 1)
2287                         left = PAGE_SIZE - 1;
2288                 p = kbuf = memdup_user_nul(buffer, left);
2289                 if (IS_ERR(kbuf))
2290                         return PTR_ERR(kbuf);
2291         }
2292
2293         for (; left && vleft--; i++, first=0) {
2294                 unsigned long lval;
2295                 bool neg;
2296
2297                 if (write) {
2298                         left -= proc_skip_spaces(&p);
2299
2300                         if (!left)
2301                                 break;
2302                         err = proc_get_long(&p, &left, &lval, &neg,
2303                                              proc_wspace_sep,
2304                                              sizeof(proc_wspace_sep), NULL);
2305                         if (err)
2306                                 break;
2307                         if (conv(&neg, &lval, i, 1, data)) {
2308                                 err = -EINVAL;
2309                                 break;
2310                         }
2311                 } else {
2312                         if (conv(&neg, &lval, i, 0, data)) {
2313                                 err = -EINVAL;
2314                                 break;
2315                         }
2316                         if (!first)
2317                                 err = proc_put_char(&buffer, &left, '\t');
2318                         if (err)
2319                                 break;
2320                         err = proc_put_long(&buffer, &left, lval, neg);
2321                         if (err)
2322                                 break;
2323                 }
2324         }
2325
2326         if (!write && !first && left && !err)
2327                 err = proc_put_char(&buffer, &left, '\n');
2328         if (write && !err && left)
2329                 left -= proc_skip_spaces(&p);
2330         if (write) {
2331                 kfree(kbuf);
2332                 if (first)
2333                         return err ? : -EINVAL;
2334         }
2335         *lenp -= left;
2336 out:
2337         *ppos += *lenp;
2338         return err;
2339 }
2340
2341 static int do_proc_dointvec(struct ctl_table *table, int write,
2342                   void __user *buffer, size_t *lenp, loff_t *ppos,
2343                   int (*conv)(bool *negp, unsigned long *lvalp, int *valp,
2344                               int write, void *data),
2345                   void *data)
2346 {
2347         return __do_proc_dointvec(table->data, table, write,
2348                         buffer, lenp, ppos, conv, data);
2349 }
2350
2351 static int do_proc_douintvec_w(unsigned int *tbl_data,
2352                                struct ctl_table *table,
2353                                void __user *buffer,
2354                                size_t *lenp, loff_t *ppos,
2355                                int (*conv)(unsigned long *lvalp,
2356                                            unsigned int *valp,
2357                                            int write, void *data),
2358                                void *data)
2359 {
2360         unsigned long lval;
2361         int err = 0;
2362         size_t left;
2363         bool neg;
2364         char *kbuf = NULL, *p;
2365
2366         left = *lenp;
2367
2368         if (proc_first_pos_non_zero_ignore(ppos, table))
2369                 goto bail_early;
2370
2371         if (left > PAGE_SIZE - 1)
2372                 left = PAGE_SIZE - 1;
2373
2374         p = kbuf = memdup_user_nul(buffer, left);
2375         if (IS_ERR(kbuf))
2376                 return -EINVAL;
2377
2378         left -= proc_skip_spaces(&p);
2379         if (!left) {
2380                 err = -EINVAL;
2381                 goto out_free;
2382         }
2383
2384         err = proc_get_long(&p, &left, &lval, &neg,
2385                              proc_wspace_sep,
2386                              sizeof(proc_wspace_sep), NULL);
2387         if (err || neg) {
2388                 err = -EINVAL;
2389                 goto out_free;
2390         }
2391
2392         if (conv(&lval, tbl_data, 1, data)) {
2393                 err = -EINVAL;
2394                 goto out_free;
2395         }
2396
2397         if (!err && left)
2398                 left -= proc_skip_spaces(&p);
2399
2400 out_free:
2401         kfree(kbuf);
2402         if (err)
2403                 return -EINVAL;
2404
2405         return 0;
2406
2407         /* This is in keeping with old __do_proc_dointvec() */
2408 bail_early:
2409         *ppos += *lenp;
2410         return err;
2411 }
2412
2413 static int do_proc_douintvec_r(unsigned int *tbl_data, void __user *buffer,
2414                                size_t *lenp, loff_t *ppos,
2415                                int (*conv)(unsigned long *lvalp,
2416                                            unsigned int *valp,
2417                                            int write, void *data),
2418                                void *data)
2419 {
2420         unsigned long lval;
2421         int err = 0;
2422         size_t left;
2423
2424         left = *lenp;
2425
2426         if (conv(&lval, tbl_data, 0, data)) {
2427                 err = -EINVAL;
2428                 goto out;
2429         }
2430
2431         err = proc_put_long(&buffer, &left, lval, false);
2432         if (err || !left)
2433                 goto out;
2434
2435         err = proc_put_char(&buffer, &left, '\n');
2436
2437 out:
2438         *lenp -= left;
2439         *ppos += *lenp;
2440
2441         return err;
2442 }
2443
2444 static int __do_proc_douintvec(void *tbl_data, struct ctl_table *table,
2445                                int write, void __user *buffer,
2446                                size_t *lenp, loff_t *ppos,
2447                                int (*conv)(unsigned long *lvalp,
2448                                            unsigned int *valp,
2449                                            int write, void *data),
2450                                void *data)
2451 {
2452         unsigned int *i, vleft;
2453
2454         if (!tbl_data || !table->maxlen || !*lenp || (*ppos && !write)) {
2455                 *lenp = 0;
2456                 return 0;
2457         }
2458
2459         i = (unsigned int *) tbl_data;
2460         vleft = table->maxlen / sizeof(*i);
2461
2462         /*
2463          * Arrays are not supported, keep this simple. *Do not* add
2464          * support for them.
2465          */
2466         if (vleft != 1) {
2467                 *lenp = 0;
2468                 return -EINVAL;
2469         }
2470
2471         if (!conv)
2472                 conv = do_proc_douintvec_conv;
2473
2474         if (write)
2475                 return do_proc_douintvec_w(i, table, buffer, lenp, ppos,
2476                                            conv, data);
2477         return do_proc_douintvec_r(i, buffer, lenp, ppos, conv, data);
2478 }
2479
2480 static int do_proc_douintvec(struct ctl_table *table, int write,
2481                              void __user *buffer, size_t *lenp, loff_t *ppos,
2482                              int (*conv)(unsigned long *lvalp,
2483                                          unsigned int *valp,
2484                                          int write, void *data),
2485                              void *data)
2486 {
2487         return __do_proc_douintvec(table->data, table, write,
2488                                    buffer, lenp, ppos, conv, data);
2489 }
2490
2491 /**
2492  * proc_dointvec - read a vector of integers
2493  * @table: the sysctl table
2494  * @write: %TRUE if this is a write to the sysctl file
2495  * @buffer: the user buffer
2496  * @lenp: the size of the user buffer
2497  * @ppos: file position
2498  *
2499  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2500  * values from/to the user buffer, treated as an ASCII string. 
2501  *
2502  * Returns 0 on success.
2503  */
2504 int proc_dointvec(struct ctl_table *table, int write,
2505                      void __user *buffer, size_t *lenp, loff_t *ppos)
2506 {
2507         return do_proc_dointvec(table, write, buffer, lenp, ppos, NULL, NULL);
2508 }
2509
2510 /**
2511  * proc_douintvec - read a vector of unsigned integers
2512  * @table: the sysctl table
2513  * @write: %TRUE if this is a write to the sysctl file
2514  * @buffer: the user buffer
2515  * @lenp: the size of the user buffer
2516  * @ppos: file position
2517  *
2518  * Reads/writes up to table->maxlen/sizeof(unsigned int) unsigned integer
2519  * values from/to the user buffer, treated as an ASCII string.
2520  *
2521  * Returns 0 on success.
2522  */
2523 int proc_douintvec(struct ctl_table *table, int write,
2524                      void __user *buffer, size_t *lenp, loff_t *ppos)
2525 {
2526         return do_proc_douintvec(table, write, buffer, lenp, ppos,
2527                                  do_proc_douintvec_conv, NULL);
2528 }
2529
2530 /*
2531  * Taint values can only be increased
2532  * This means we can safely use a temporary.
2533  */
2534 static int proc_taint(struct ctl_table *table, int write,
2535                                void __user *buffer, size_t *lenp, loff_t *ppos)
2536 {
2537         struct ctl_table t;
2538         unsigned long tmptaint = get_taint();
2539         int err;
2540
2541         if (write && !capable(CAP_SYS_ADMIN))
2542                 return -EPERM;
2543
2544         t = *table;
2545         t.data = &tmptaint;
2546         err = proc_doulongvec_minmax(&t, write, buffer, lenp, ppos);
2547         if (err < 0)
2548                 return err;
2549
2550         if (write) {
2551                 /*
2552                  * Poor man's atomic or. Not worth adding a primitive
2553                  * to everyone's atomic.h for this
2554                  */
2555                 int i;
2556                 for (i = 0; i < BITS_PER_LONG && tmptaint >> i; i++) {
2557                         if ((tmptaint >> i) & 1)
2558                                 add_taint(i, LOCKDEP_STILL_OK);
2559                 }
2560         }
2561
2562         return err;
2563 }
2564
2565 #ifdef CONFIG_PRINTK
2566 static int proc_dointvec_minmax_sysadmin(struct ctl_table *table, int write,
2567                                 void __user *buffer, size_t *lenp, loff_t *ppos)
2568 {
2569         if (write && !capable(CAP_SYS_ADMIN))
2570                 return -EPERM;
2571
2572         return proc_dointvec_minmax(table, write, buffer, lenp, ppos);
2573 }
2574 #endif
2575
2576 /**
2577  * struct do_proc_dointvec_minmax_conv_param - proc_dointvec_minmax() range checking structure
2578  * @min: pointer to minimum allowable value
2579  * @max: pointer to maximum allowable value
2580  *
2581  * The do_proc_dointvec_minmax_conv_param structure provides the
2582  * minimum and maximum values for doing range checking for those sysctl
2583  * parameters that use the proc_dointvec_minmax() handler.
2584  */
2585 struct do_proc_dointvec_minmax_conv_param {
2586         int *min;
2587         int *max;
2588 };
2589
2590 static int do_proc_dointvec_minmax_conv(bool *negp, unsigned long *lvalp,
2591                                         int *valp,
2592                                         int write, void *data)
2593 {
2594         struct do_proc_dointvec_minmax_conv_param *param = data;
2595         if (write) {
2596                 int val = *negp ? -*lvalp : *lvalp;
2597                 if ((param->min && *param->min > val) ||
2598                     (param->max && *param->max < val))
2599                         return -EINVAL;
2600                 *valp = val;
2601         } else {
2602                 int val = *valp;
2603                 if (val < 0) {
2604                         *negp = true;
2605                         *lvalp = -(unsigned long)val;
2606                 } else {
2607                         *negp = false;
2608                         *lvalp = (unsigned long)val;
2609                 }
2610         }
2611         return 0;
2612 }
2613
2614 /**
2615  * proc_dointvec_minmax - read a vector of integers with min/max values
2616  * @table: the sysctl table
2617  * @write: %TRUE if this is a write to the sysctl file
2618  * @buffer: the user buffer
2619  * @lenp: the size of the user buffer
2620  * @ppos: file position
2621  *
2622  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2623  * values from/to the user buffer, treated as an ASCII string.
2624  *
2625  * This routine will ensure the values are within the range specified by
2626  * table->extra1 (min) and table->extra2 (max).
2627  *
2628  * Returns 0 on success or -EINVAL on write when the range check fails.
2629  */
2630 int proc_dointvec_minmax(struct ctl_table *table, int write,
2631                   void __user *buffer, size_t *lenp, loff_t *ppos)
2632 {
2633         struct do_proc_dointvec_minmax_conv_param param = {
2634                 .min = (int *) table->extra1,
2635                 .max = (int *) table->extra2,
2636         };
2637         return do_proc_dointvec(table, write, buffer, lenp, ppos,
2638                                 do_proc_dointvec_minmax_conv, &param);
2639 }
2640
2641 /**
2642  * struct do_proc_douintvec_minmax_conv_param - proc_douintvec_minmax() range checking structure
2643  * @min: pointer to minimum allowable value
2644  * @max: pointer to maximum allowable value
2645  *
2646  * The do_proc_douintvec_minmax_conv_param structure provides the
2647  * minimum and maximum values for doing range checking for those sysctl
2648  * parameters that use the proc_douintvec_minmax() handler.
2649  */
2650 struct do_proc_douintvec_minmax_conv_param {
2651         unsigned int *min;
2652         unsigned int *max;
2653 };
2654
2655 static int do_proc_douintvec_minmax_conv(unsigned long *lvalp,
2656                                          unsigned int *valp,
2657                                          int write, void *data)
2658 {
2659         struct do_proc_douintvec_minmax_conv_param *param = data;
2660
2661         if (write) {
2662                 unsigned int val = *lvalp;
2663
2664                 if (*lvalp > UINT_MAX)
2665                         return -EINVAL;
2666
2667                 if ((param->min && *param->min > val) ||
2668                     (param->max && *param->max < val))
2669                         return -ERANGE;
2670
2671                 *valp = val;
2672         } else {
2673                 unsigned int val = *valp;
2674                 *lvalp = (unsigned long) val;
2675         }
2676
2677         return 0;
2678 }
2679
2680 /**
2681  * proc_douintvec_minmax - read a vector of unsigned ints with min/max values
2682  * @table: the sysctl table
2683  * @write: %TRUE if this is a write to the sysctl file
2684  * @buffer: the user buffer
2685  * @lenp: the size of the user buffer
2686  * @ppos: file position
2687  *
2688  * Reads/writes up to table->maxlen/sizeof(unsigned int) unsigned integer
2689  * values from/to the user buffer, treated as an ASCII string. Negative
2690  * strings are not allowed.
2691  *
2692  * This routine will ensure the values are within the range specified by
2693  * table->extra1 (min) and table->extra2 (max). There is a final sanity
2694  * check for UINT_MAX to avoid having to support wrap around uses from
2695  * userspace.
2696  *
2697  * Returns 0 on success or -ERANGE on write when the range check fails.
2698  */
2699 int proc_douintvec_minmax(struct ctl_table *table, int write,
2700                           void __user *buffer, size_t *lenp, loff_t *ppos)
2701 {
2702         struct do_proc_douintvec_minmax_conv_param param = {
2703                 .min = (unsigned int *) table->extra1,
2704                 .max = (unsigned int *) table->extra2,
2705         };
2706         return do_proc_douintvec(table, write, buffer, lenp, ppos,
2707                                  do_proc_douintvec_minmax_conv, &param);
2708 }
2709
2710 static int do_proc_dopipe_max_size_conv(unsigned long *lvalp,
2711                                         unsigned int *valp,
2712                                         int write, void *data)
2713 {
2714         if (write) {
2715                 unsigned int val;
2716
2717                 val = round_pipe_size(*lvalp);
2718                 if (val == 0)
2719                         return -EINVAL;
2720
2721                 *valp = val;
2722         } else {
2723                 unsigned int val = *valp;
2724                 *lvalp = (unsigned long) val;
2725         }
2726
2727         return 0;
2728 }
2729
2730 static int proc_dopipe_max_size(struct ctl_table *table, int write,
2731                                 void __user *buffer, size_t *lenp, loff_t *ppos)
2732 {
2733         return do_proc_douintvec(table, write, buffer, lenp, ppos,
2734                                  do_proc_dopipe_max_size_conv, NULL);
2735 }
2736
2737 static void validate_coredump_safety(void)
2738 {
2739 #ifdef CONFIG_COREDUMP
2740         if (suid_dumpable == SUID_DUMP_ROOT &&
2741             core_pattern[0] != '/' && core_pattern[0] != '|') {
2742                 printk(KERN_WARNING
2743 "Unsafe core_pattern used with fs.suid_dumpable=2.\n"
2744 "Pipe handler or fully qualified core dump path required.\n"
2745 "Set kernel.core_pattern before fs.suid_dumpable.\n"
2746                 );
2747         }
2748 #endif
2749 }
2750
2751 static int proc_dointvec_minmax_coredump(struct ctl_table *table, int write,
2752                 void __user *buffer, size_t *lenp, loff_t *ppos)
2753 {
2754         int error = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
2755         if (!error)
2756                 validate_coredump_safety();
2757         return error;
2758 }
2759
2760 #ifdef CONFIG_COREDUMP
2761 static int proc_dostring_coredump(struct ctl_table *table, int write,
2762                   void __user *buffer, size_t *lenp, loff_t *ppos)
2763 {
2764         int error = proc_dostring(table, write, buffer, lenp, ppos);
2765         if (!error)
2766                 validate_coredump_safety();
2767         return error;
2768 }
2769 #endif
2770
2771 static int __do_proc_doulongvec_minmax(void *data, struct ctl_table *table, int write,
2772                                      void __user *buffer,
2773                                      size_t *lenp, loff_t *ppos,
2774                                      unsigned long convmul,
2775                                      unsigned long convdiv)
2776 {
2777         unsigned long *i, *min, *max;
2778         int vleft, first = 1, err = 0;
2779         size_t left;
2780         char *kbuf = NULL, *p;
2781
2782         if (!data || !table->maxlen || !*lenp || (*ppos && !write)) {
2783                 *lenp = 0;
2784                 return 0;
2785         }
2786
2787         i = (unsigned long *) data;
2788         min = (unsigned long *) table->extra1;
2789         max = (unsigned long *) table->extra2;
2790         vleft = table->maxlen / sizeof(unsigned long);
2791         left = *lenp;
2792
2793         if (write) {
2794                 if (proc_first_pos_non_zero_ignore(ppos, table))
2795                         goto out;
2796
2797                 if (left > PAGE_SIZE - 1)
2798                         left = PAGE_SIZE - 1;
2799                 p = kbuf = memdup_user_nul(buffer, left);
2800                 if (IS_ERR(kbuf))
2801                         return PTR_ERR(kbuf);
2802         }
2803
2804         for (; left && vleft--; i++, first = 0) {
2805                 unsigned long val;
2806
2807                 if (write) {
2808                         bool neg;
2809
2810                         left -= proc_skip_spaces(&p);
2811                         if (!left)
2812                                 break;
2813
2814                         err = proc_get_long(&p, &left, &val, &neg,
2815                                              proc_wspace_sep,
2816                                              sizeof(proc_wspace_sep), NULL);
2817                         if (err)
2818                                 break;
2819                         if (neg)
2820                                 continue;
2821                         val = convmul * val / convdiv;
2822                         if ((min && val < *min) || (max && val > *max))
2823                                 continue;
2824                         *i = val;
2825                 } else {
2826                         val = convdiv * (*i) / convmul;
2827                         if (!first) {
2828                                 err = proc_put_char(&buffer, &left, '\t');
2829                                 if (err)
2830                                         break;
2831                         }
2832                         err = proc_put_long(&buffer, &left, val, false);
2833                         if (err)
2834                                 break;
2835                 }
2836         }
2837
2838         if (!write && !first && left && !err)
2839                 err = proc_put_char(&buffer, &left, '\n');
2840         if (write && !err)
2841                 left -= proc_skip_spaces(&p);
2842         if (write) {
2843                 kfree(kbuf);
2844                 if (first)
2845                         return err ? : -EINVAL;
2846         }
2847         *lenp -= left;
2848 out:
2849         *ppos += *lenp;
2850         return err;
2851 }
2852
2853 static int do_proc_doulongvec_minmax(struct ctl_table *table, int write,
2854                                      void __user *buffer,
2855                                      size_t *lenp, loff_t *ppos,
2856                                      unsigned long convmul,
2857                                      unsigned long convdiv)
2858 {
2859         return __do_proc_doulongvec_minmax(table->data, table, write,
2860                         buffer, lenp, ppos, convmul, convdiv);
2861 }
2862
2863 /**
2864  * proc_doulongvec_minmax - read a vector of long integers with min/max values
2865  * @table: the sysctl table
2866  * @write: %TRUE if this is a write to the sysctl file
2867  * @buffer: the user buffer
2868  * @lenp: the size of the user buffer
2869  * @ppos: file position
2870  *
2871  * Reads/writes up to table->maxlen/sizeof(unsigned long) unsigned long
2872  * values from/to the user buffer, treated as an ASCII string.
2873  *
2874  * This routine will ensure the values are within the range specified by
2875  * table->extra1 (min) and table->extra2 (max).
2876  *
2877  * Returns 0 on success.
2878  */
2879 int proc_doulongvec_minmax(struct ctl_table *table, int write,
2880                            void __user *buffer, size_t *lenp, loff_t *ppos)
2881 {
2882     return do_proc_doulongvec_minmax(table, write, buffer, lenp, ppos, 1l, 1l);
2883 }
2884
2885 /**
2886  * proc_doulongvec_ms_jiffies_minmax - read a vector of millisecond values with min/max values
2887  * @table: the sysctl table
2888  * @write: %TRUE if this is a write to the sysctl file
2889  * @buffer: the user buffer
2890  * @lenp: the size of the user buffer
2891  * @ppos: file position
2892  *
2893  * Reads/writes up to table->maxlen/sizeof(unsigned long) unsigned long
2894  * values from/to the user buffer, treated as an ASCII string. The values
2895  * are treated as milliseconds, and converted to jiffies when they are stored.
2896  *
2897  * This routine will ensure the values are within the range specified by
2898  * table->extra1 (min) and table->extra2 (max).
2899  *
2900  * Returns 0 on success.
2901  */
2902 int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int write,
2903                                       void __user *buffer,
2904                                       size_t *lenp, loff_t *ppos)
2905 {
2906     return do_proc_doulongvec_minmax(table, write, buffer,
2907                                      lenp, ppos, HZ, 1000l);
2908 }
2909
2910
2911 static int do_proc_dointvec_jiffies_conv(bool *negp, unsigned long *lvalp,
2912                                          int *valp,
2913                                          int write, void *data)
2914 {
2915         if (write) {
2916                 if (*lvalp > INT_MAX / HZ)
2917                         return 1;
2918                 *valp = *negp ? -(*lvalp*HZ) : (*lvalp*HZ);
2919         } else {
2920                 int val = *valp;
2921                 unsigned long lval;
2922                 if (val < 0) {
2923                         *negp = true;
2924                         lval = -(unsigned long)val;
2925                 } else {
2926                         *negp = false;
2927                         lval = (unsigned long)val;
2928                 }
2929                 *lvalp = lval / HZ;
2930         }
2931         return 0;
2932 }
2933
2934 static int do_proc_dointvec_userhz_jiffies_conv(bool *negp, unsigned long *lvalp,
2935                                                 int *valp,
2936                                                 int write, void *data)
2937 {
2938         if (write) {
2939                 if (USER_HZ < HZ && *lvalp > (LONG_MAX / HZ) * USER_HZ)
2940                         return 1;
2941                 *valp = clock_t_to_jiffies(*negp ? -*lvalp : *lvalp);
2942         } else {
2943                 int val = *valp;
2944                 unsigned long lval;
2945                 if (val < 0) {
2946                         *negp = true;
2947                         lval = -(unsigned long)val;
2948                 } else {
2949                         *negp = false;
2950                         lval = (unsigned long)val;
2951                 }
2952                 *lvalp = jiffies_to_clock_t(lval);
2953         }
2954         return 0;
2955 }
2956
2957 static int do_proc_dointvec_ms_jiffies_conv(bool *negp, unsigned long *lvalp,
2958                                             int *valp,
2959                                             int write, void *data)
2960 {
2961         if (write) {
2962                 unsigned long jif = msecs_to_jiffies(*negp ? -*lvalp : *lvalp);
2963
2964                 if (jif > INT_MAX)
2965                         return 1;
2966                 *valp = (int)jif;
2967         } else {
2968                 int val = *valp;
2969                 unsigned long lval;
2970                 if (val < 0) {
2971                         *negp = true;
2972                         lval = -(unsigned long)val;
2973                 } else {
2974                         *negp = false;
2975                         lval = (unsigned long)val;
2976                 }
2977                 *lvalp = jiffies_to_msecs(lval);
2978         }
2979         return 0;
2980 }
2981
2982 /**
2983  * proc_dointvec_jiffies - read a vector of integers as seconds
2984  * @table: the sysctl table
2985  * @write: %TRUE if this is a write to the sysctl file
2986  * @buffer: the user buffer
2987  * @lenp: the size of the user buffer
2988  * @ppos: file position
2989  *
2990  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2991  * values from/to the user buffer, treated as an ASCII string. 
2992  * The values read are assumed to be in seconds, and are converted into
2993  * jiffies.
2994  *
2995  * Returns 0 on success.
2996  */
2997 int proc_dointvec_jiffies(struct ctl_table *table, int write,
2998                           void __user *buffer, size_t *lenp, loff_t *ppos)
2999 {
3000     return do_proc_dointvec(table,write,buffer,lenp,ppos,
3001                             do_proc_dointvec_jiffies_conv,NULL);
3002 }
3003
3004 /**
3005  * proc_dointvec_userhz_jiffies - read a vector of integers as 1/USER_HZ seconds
3006  * @table: the sysctl table
3007  * @write: %TRUE if this is a write to the sysctl file
3008  * @buffer: the user buffer
3009  * @lenp: the size of the user buffer
3010  * @ppos: pointer to the file position
3011  *
3012  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
3013  * values from/to the user buffer, treated as an ASCII string. 
3014  * The values read are assumed to be in 1/USER_HZ seconds, and 
3015  * are converted into jiffies.
3016  *
3017  * Returns 0 on success.
3018  */
3019 int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write,
3020                                  void __user *buffer, size_t *lenp, loff_t *ppos)
3021 {
3022     return do_proc_dointvec(table,write,buffer,lenp,ppos,
3023                             do_proc_dointvec_userhz_jiffies_conv,NULL);
3024 }
3025
3026 /**
3027  * proc_dointvec_ms_jiffies - read a vector of integers as 1 milliseconds
3028  * @table: the sysctl table
3029  * @write: %TRUE if this is a write to the sysctl file
3030  * @buffer: the user buffer
3031  * @lenp: the size of the user buffer
3032  * @ppos: file position
3033  * @ppos: the current position in the file
3034  *
3035  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
3036  * values from/to the user buffer, treated as an ASCII string. 
3037  * The values read are assumed to be in 1/1000 seconds, and 
3038  * are converted into jiffies.
3039  *
3040  * Returns 0 on success.
3041  */
3042 int proc_dointvec_ms_jiffies(struct ctl_table *table, int write,
3043                              void __user *buffer, size_t *lenp, loff_t *ppos)
3044 {
3045         return do_proc_dointvec(table, write, buffer, lenp, ppos,
3046                                 do_proc_dointvec_ms_jiffies_conv, NULL);
3047 }
3048
3049 static int proc_do_cad_pid(struct ctl_table *table, int write,
3050                            void __user *buffer, size_t *lenp, loff_t *ppos)
3051 {
3052         struct pid *new_pid;
3053         pid_t tmp;
3054         int r;
3055
3056         tmp = pid_vnr(cad_pid);
3057
3058         r = __do_proc_dointvec(&tmp, table, write, buffer,
3059                                lenp, ppos, NULL, NULL);
3060         if (r || !write)
3061                 return r;
3062
3063         new_pid = find_get_pid(tmp);
3064         if (!new_pid)
3065                 return -ESRCH;
3066
3067         put_pid(xchg(&cad_pid, new_pid));
3068         return 0;
3069 }
3070
3071 /**
3072  * proc_do_large_bitmap - read/write from/to a large bitmap
3073  * @table: the sysctl table
3074  * @write: %TRUE if this is a write to the sysctl file
3075  * @buffer: the user buffer
3076  * @lenp: the size of the user buffer
3077  * @ppos: file position
3078  *
3079  * The bitmap is stored at table->data and the bitmap length (in bits)
3080  * in table->maxlen.
3081  *
3082  * We use a range comma separated format (e.g. 1,3-4,10-10) so that
3083  * large bitmaps may be represented in a compact manner. Writing into
3084  * the file will clear the bitmap then update it with the given input.
3085  *
3086  * Returns 0 on success.
3087  */
3088 int proc_do_large_bitmap(struct ctl_table *table, int write,
3089                          void __user *buffer, size_t *lenp, loff_t *ppos)
3090 {
3091         int err = 0;
3092         bool first = 1;
3093         size_t left = *lenp;
3094         unsigned long bitmap_len = table->maxlen;
3095         unsigned long *bitmap = *(unsigned long **) table->data;
3096         unsigned long *tmp_bitmap = NULL;
3097         char tr_a[] = { '-', ',', '\n' }, tr_b[] = { ',', '\n', 0 }, c;
3098
3099         if (!bitmap || !bitmap_len || !left || (*ppos && !write)) {
3100                 *lenp = 0;
3101                 return 0;
3102         }
3103
3104         if (write) {
3105                 char *kbuf, *p;
3106
3107                 if (left > PAGE_SIZE - 1)
3108                         left = PAGE_SIZE - 1;
3109
3110                 p = kbuf = memdup_user_nul(buffer, left);
3111                 if (IS_ERR(kbuf))
3112                         return PTR_ERR(kbuf);
3113
3114                 tmp_bitmap = kcalloc(BITS_TO_LONGS(bitmap_len),
3115                                      sizeof(unsigned long),
3116                                      GFP_KERNEL);
3117                 if (!tmp_bitmap) {
3118                         kfree(kbuf);
3119                         return -ENOMEM;
3120                 }
3121                 proc_skip_char(&p, &left, '\n');
3122                 while (!err && left) {
3123                         unsigned long val_a, val_b;
3124                         bool neg;
3125
3126                         err = proc_get_long(&p, &left, &val_a, &neg, tr_a,
3127                                              sizeof(tr_a), &c);
3128                         if (err)
3129                                 break;
3130                         if (val_a >= bitmap_len || neg) {
3131                                 err = -EINVAL;
3132                                 break;
3133                         }
3134
3135                         val_b = val_a;
3136                         if (left) {
3137                                 p++;
3138                                 left--;
3139                         }
3140
3141                         if (c == '-') {
3142                                 err = proc_get_long(&p, &left, &val_b,
3143                                                      &neg, tr_b, sizeof(tr_b),
3144                                                      &c);
3145                                 if (err)
3146                                         break;
3147                                 if (val_b >= bitmap_len || neg ||
3148                                     val_a > val_b) {
3149                                         err = -EINVAL;
3150                                         break;
3151                                 }
3152                                 if (left) {
3153                                         p++;
3154                                         left--;
3155                                 }
3156                         }
3157
3158                         bitmap_set(tmp_bitmap, val_a, val_b - val_a + 1);
3159                         first = 0;
3160                         proc_skip_char(&p, &left, '\n');
3161                 }
3162                 kfree(kbuf);
3163         } else {
3164                 unsigned long bit_a, bit_b = 0;
3165
3166                 while (left) {
3167                         bit_a = find_next_bit(bitmap, bitmap_len, bit_b);
3168                         if (bit_a >= bitmap_len)
3169                                 break;
3170                         bit_b = find_next_zero_bit(bitmap, bitmap_len,
3171                                                    bit_a + 1) - 1;
3172
3173                         if (!first) {
3174                                 err = proc_put_char(&buffer, &left, ',');
3175                                 if (err)
3176                                         break;
3177                         }
3178                         err = proc_put_long(&buffer, &left, bit_a, false);
3179                         if (err)
3180                                 break;
3181                         if (bit_a != bit_b) {
3182                                 err = proc_put_char(&buffer, &left, '-');
3183                                 if (err)
3184                                         break;
3185                                 err = proc_put_long(&buffer, &left, bit_b, false);
3186                                 if (err)
3187                                         break;
3188                         }
3189
3190                         first = 0; bit_b++;
3191                 }
3192                 if (!err)
3193                         err = proc_put_char(&buffer, &left, '\n');
3194         }
3195
3196         if (!err) {
3197                 if (write) {
3198                         if (*ppos)
3199                                 bitmap_or(bitmap, bitmap, tmp_bitmap, bitmap_len);
3200                         else
3201                                 bitmap_copy(bitmap, tmp_bitmap, bitmap_len);
3202                 }
3203                 *lenp -= left;
3204                 *ppos += *lenp;
3205         }
3206
3207         kfree(tmp_bitmap);
3208         return err;
3209 }
3210
3211 #else /* CONFIG_PROC_SYSCTL */
3212
3213 int proc_dostring(struct ctl_table *table, int write,
3214                   void __user *buffer, size_t *lenp, loff_t *ppos)
3215 {
3216         return -ENOSYS;
3217 }
3218
3219 int proc_dointvec(struct ctl_table *table, int write,
3220                   void __user *buffer, size_t *lenp, loff_t *ppos)
3221 {
3222         return -ENOSYS;
3223 }
3224
3225 int proc_douintvec(struct ctl_table *table, int write,
3226                   void __user *buffer, size_t *lenp, loff_t *ppos)
3227 {
3228         return -ENOSYS;
3229 }
3230
3231 int proc_dointvec_minmax(struct ctl_table *table, int write,
3232                     void __user *buffer, size_t *lenp, loff_t *ppos)
3233 {
3234         return -ENOSYS;
3235 }
3236
3237 int proc_douintvec_minmax(struct ctl_table *table, int write,
3238                           void __user *buffer, size_t *lenp, loff_t *ppos)
3239 {
3240         return -ENOSYS;
3241 }
3242
3243 int proc_dointvec_jiffies(struct ctl_table *table, int write,
3244                     void __user *buffer, size_t *lenp, loff_t *ppos)
3245 {
3246         return -ENOSYS;
3247 }
3248
3249 int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write,
3250                     void __user *buffer, size_t *lenp, loff_t *ppos)
3251 {
3252         return -ENOSYS;
3253 }
3254
3255 int proc_dointvec_ms_jiffies(struct ctl_table *table, int write,
3256                              void __user *buffer, size_t *lenp, loff_t *ppos)
3257 {
3258         return -ENOSYS;
3259 }
3260
3261 int proc_doulongvec_minmax(struct ctl_table *table, int write,
3262                     void __user *buffer, size_t *lenp, loff_t *ppos)
3263 {
3264         return -ENOSYS;
3265 }
3266
3267 int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int write,
3268                                       void __user *buffer,
3269                                       size_t *lenp, loff_t *ppos)
3270 {
3271     return -ENOSYS;
3272 }
3273
3274
3275 #endif /* CONFIG_PROC_SYSCTL */
3276
3277 #ifdef CONFIG_BPF_SYSCALL
3278 static int proc_dointvec_minmax_bpf_stats(struct ctl_table *table, int write,
3279                                           void __user *buffer, size_t *lenp,
3280                                           loff_t *ppos)
3281 {
3282         int ret, bpf_stats = *(int *)table->data;
3283         struct ctl_table tmp = *table;
3284
3285         if (write && !capable(CAP_SYS_ADMIN))
3286                 return -EPERM;
3287
3288         tmp.data = &bpf_stats;
3289         ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
3290         if (write && !ret) {
3291                 *(int *)table->data = bpf_stats;
3292                 if (bpf_stats)
3293                         static_branch_enable(&bpf_stats_enabled_key);
3294                 else
3295                         static_branch_disable(&bpf_stats_enabled_key);
3296         }
3297         return ret;
3298 }
3299 #endif
3300 /*
3301  * No sense putting this after each symbol definition, twice,
3302  * exception granted :-)
3303  */
3304 EXPORT_SYMBOL(proc_dointvec);
3