1 /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
2 * Copyright (c) 2016 Facebook
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of version 2 of the GNU General Public
6 * License as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 #include <linux/kernel.h>
14 #include <linux/types.h>
15 #include <linux/slab.h>
16 #include <linux/bpf.h>
17 #include <linux/bpf_verifier.h>
18 #include <linux/filter.h>
19 #include <net/netlink.h>
20 #include <linux/file.h>
21 #include <linux/vmalloc.h>
22 #include <linux/stringify.h>
23 #include <linux/bsearch.h>
24 #include <linux/sort.h>
28 static const struct bpf_verifier_ops * const bpf_verifier_ops[] = {
29 #define BPF_PROG_TYPE(_id, _name) \
30 [_id] = & _name ## _verifier_ops,
31 #define BPF_MAP_TYPE(_id, _ops)
32 #include <linux/bpf_types.h>
37 /* bpf_check() is a static code analyzer that walks eBPF program
38 * instruction by instruction and updates register/stack state.
39 * All paths of conditional branches are analyzed until 'bpf_exit' insn.
41 * The first pass is depth-first-search to check that the program is a DAG.
42 * It rejects the following programs:
43 * - larger than BPF_MAXINSNS insns
44 * - if loop is present (detected via back-edge)
45 * - unreachable insns exist (shouldn't be a forest. program = one function)
46 * - out of bounds or malformed jumps
47 * The second pass is all possible path descent from the 1st insn.
48 * Since it's analyzing all pathes through the program, the length of the
49 * analysis is limited to 64k insn, which may be hit even if total number of
50 * insn is less then 4K, but there are too many branches that change stack/regs.
51 * Number of 'branches to be analyzed' is limited to 1k
53 * On entry to each instruction, each register has a type, and the instruction
54 * changes the types of the registers depending on instruction semantics.
55 * If instruction is BPF_MOV64_REG(BPF_REG_1, BPF_REG_5), then type of R5 is
58 * All registers are 64-bit.
59 * R0 - return register
60 * R1-R5 argument passing registers
61 * R6-R9 callee saved registers
62 * R10 - frame pointer read-only
64 * At the start of BPF program the register R1 contains a pointer to bpf_context
65 * and has type PTR_TO_CTX.
67 * Verifier tracks arithmetic operations on pointers in case:
68 * BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
69 * BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -20),
70 * 1st insn copies R10 (which has FRAME_PTR) type into R1
71 * and 2nd arithmetic instruction is pattern matched to recognize
72 * that it wants to construct a pointer to some element within stack.
73 * So after 2nd insn, the register R1 has type PTR_TO_STACK
74 * (and -20 constant is saved for further stack bounds checking).
75 * Meaning that this reg is a pointer to stack plus known immediate constant.
77 * Most of the time the registers have SCALAR_VALUE type, which
78 * means the register has some value, but it's not a valid pointer.
79 * (like pointer plus pointer becomes SCALAR_VALUE type)
81 * When verifier sees load or store instructions the type of base register
82 * can be: PTR_TO_MAP_VALUE, PTR_TO_CTX, PTR_TO_STACK. These are three pointer
83 * types recognized by check_mem_access() function.
85 * PTR_TO_MAP_VALUE means that this register is pointing to 'map element value'
86 * and the range of [ptr, ptr + map's value_size) is accessible.
88 * registers used to pass values to function calls are checked against
89 * function argument constraints.
91 * ARG_PTR_TO_MAP_KEY is one of such argument constraints.
92 * It means that the register type passed to this function must be
93 * PTR_TO_STACK and it will be used inside the function as
94 * 'pointer to map element key'
96 * For example the argument constraints for bpf_map_lookup_elem():
97 * .ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL,
98 * .arg1_type = ARG_CONST_MAP_PTR,
99 * .arg2_type = ARG_PTR_TO_MAP_KEY,
101 * ret_type says that this function returns 'pointer to map elem value or null'
102 * function expects 1st argument to be a const pointer to 'struct bpf_map' and
103 * 2nd argument should be a pointer to stack, which will be used inside
104 * the helper function as a pointer to map element key.
106 * On the kernel side the helper function looks like:
107 * u64 bpf_map_lookup_elem(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
109 * struct bpf_map *map = (struct bpf_map *) (unsigned long) r1;
110 * void *key = (void *) (unsigned long) r2;
113 * here kernel can access 'key' and 'map' pointers safely, knowing that
114 * [key, key + map->key_size) bytes are valid and were initialized on
115 * the stack of eBPF program.
118 * Corresponding eBPF program may look like:
119 * BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), // after this insn R2 type is FRAME_PTR
120 * BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), // after this insn R2 type is PTR_TO_STACK
121 * BPF_LD_MAP_FD(BPF_REG_1, map_fd), // after this insn R1 type is CONST_PTR_TO_MAP
122 * BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
123 * here verifier looks at prototype of map_lookup_elem() and sees:
124 * .arg1_type == ARG_CONST_MAP_PTR and R1->type == CONST_PTR_TO_MAP, which is ok,
125 * Now verifier knows that this map has key of R1->map_ptr->key_size bytes
127 * Then .arg2_type == ARG_PTR_TO_MAP_KEY and R2->type == PTR_TO_STACK, ok so far,
128 * Now verifier checks that [R2, R2 + map's key_size) are within stack limits
129 * and were initialized prior to this call.
130 * If it's ok, then verifier allows this BPF_CALL insn and looks at
131 * .ret_type which is RET_PTR_TO_MAP_VALUE_OR_NULL, so it sets
132 * R0->type = PTR_TO_MAP_VALUE_OR_NULL which means bpf_map_lookup_elem() function
133 * returns ether pointer to map value or NULL.
135 * When type PTR_TO_MAP_VALUE_OR_NULL passes through 'if (reg != 0) goto +off'
136 * insn, the register holding that pointer in the true branch changes state to
137 * PTR_TO_MAP_VALUE and the same register changes state to CONST_IMM in the false
138 * branch. See check_cond_jmp_op().
140 * After the call R0 is set to return type of the function and registers R1-R5
141 * are set to NOT_INIT to indicate that they are no longer readable.
144 /* verifier_state + insn_idx are pushed to stack when branch is encountered */
145 struct bpf_verifier_stack_elem {
146 /* verifer state is 'st'
147 * before processing instruction 'insn_idx'
148 * and after processing instruction 'prev_insn_idx'
150 struct bpf_verifier_state st;
153 struct bpf_verifier_stack_elem *next;
156 #define BPF_COMPLEXITY_LIMIT_INSNS 131072
157 #define BPF_COMPLEXITY_LIMIT_STACK 1024
159 #define BPF_MAP_PTR_POISON ((void *)0xeB9F + POISON_POINTER_DELTA)
161 struct bpf_call_arg_meta {
162 struct bpf_map *map_ptr;
169 static DEFINE_MUTEX(bpf_verifier_lock);
171 void bpf_verifier_vlog(struct bpf_verifier_log *log, const char *fmt,
176 n = vscnprintf(log->kbuf, BPF_VERIFIER_TMP_LOG_SIZE, fmt, args);
178 WARN_ONCE(n >= BPF_VERIFIER_TMP_LOG_SIZE - 1,
179 "verifier log line truncated - local buffer too short\n");
181 n = min(log->len_total - log->len_used - 1, n);
184 if (!copy_to_user(log->ubuf + log->len_used, log->kbuf, n + 1))
190 /* log_level controls verbosity level of eBPF verifier.
191 * bpf_verifier_log_write() is used to dump the verification trace to the log,
192 * so the user can figure out what's wrong with the program
194 __printf(2, 3) void bpf_verifier_log_write(struct bpf_verifier_env *env,
195 const char *fmt, ...)
199 if (!bpf_verifier_log_needed(&env->log))
203 bpf_verifier_vlog(&env->log, fmt, args);
206 EXPORT_SYMBOL_GPL(bpf_verifier_log_write);
208 __printf(2, 3) static void verbose(void *private_data, const char *fmt, ...)
210 struct bpf_verifier_env *env = private_data;
213 if (!bpf_verifier_log_needed(&env->log))
217 bpf_verifier_vlog(&env->log, fmt, args);
221 static bool type_is_pkt_pointer(enum bpf_reg_type type)
223 return type == PTR_TO_PACKET ||
224 type == PTR_TO_PACKET_META;
227 /* string representation of 'enum bpf_reg_type' */
228 static const char * const reg_type_str[] = {
230 [SCALAR_VALUE] = "inv",
231 [PTR_TO_CTX] = "ctx",
232 [CONST_PTR_TO_MAP] = "map_ptr",
233 [PTR_TO_MAP_VALUE] = "map_value",
234 [PTR_TO_MAP_VALUE_OR_NULL] = "map_value_or_null",
235 [PTR_TO_STACK] = "fp",
236 [PTR_TO_PACKET] = "pkt",
237 [PTR_TO_PACKET_META] = "pkt_meta",
238 [PTR_TO_PACKET_END] = "pkt_end",
241 static void print_liveness(struct bpf_verifier_env *env,
242 enum bpf_reg_liveness live)
244 if (live & (REG_LIVE_READ | REG_LIVE_WRITTEN))
246 if (live & REG_LIVE_READ)
248 if (live & REG_LIVE_WRITTEN)
252 static struct bpf_func_state *func(struct bpf_verifier_env *env,
253 const struct bpf_reg_state *reg)
255 struct bpf_verifier_state *cur = env->cur_state;
257 return cur->frame[reg->frameno];
260 static void print_verifier_state(struct bpf_verifier_env *env,
261 const struct bpf_func_state *state)
263 const struct bpf_reg_state *reg;
268 verbose(env, " frame%d:", state->frameno);
269 for (i = 0; i < MAX_BPF_REG; i++) {
270 reg = &state->regs[i];
274 verbose(env, " R%d", i);
275 print_liveness(env, reg->live);
276 verbose(env, "=%s", reg_type_str[t]);
277 if ((t == SCALAR_VALUE || t == PTR_TO_STACK) &&
278 tnum_is_const(reg->var_off)) {
279 /* reg->off should be 0 for SCALAR_VALUE */
280 verbose(env, "%lld", reg->var_off.value + reg->off);
281 if (t == PTR_TO_STACK)
282 verbose(env, ",call_%d", func(env, reg)->callsite);
284 verbose(env, "(id=%d", reg->id);
285 if (t != SCALAR_VALUE)
286 verbose(env, ",off=%d", reg->off);
287 if (type_is_pkt_pointer(t))
288 verbose(env, ",r=%d", reg->range);
289 else if (t == CONST_PTR_TO_MAP ||
290 t == PTR_TO_MAP_VALUE ||
291 t == PTR_TO_MAP_VALUE_OR_NULL)
292 verbose(env, ",ks=%d,vs=%d",
293 reg->map_ptr->key_size,
294 reg->map_ptr->value_size);
295 if (tnum_is_const(reg->var_off)) {
296 /* Typically an immediate SCALAR_VALUE, but
297 * could be a pointer whose offset is too big
300 verbose(env, ",imm=%llx", reg->var_off.value);
302 if (reg->smin_value != reg->umin_value &&
303 reg->smin_value != S64_MIN)
304 verbose(env, ",smin_value=%lld",
305 (long long)reg->smin_value);
306 if (reg->smax_value != reg->umax_value &&
307 reg->smax_value != S64_MAX)
308 verbose(env, ",smax_value=%lld",
309 (long long)reg->smax_value);
310 if (reg->umin_value != 0)
311 verbose(env, ",umin_value=%llu",
312 (unsigned long long)reg->umin_value);
313 if (reg->umax_value != U64_MAX)
314 verbose(env, ",umax_value=%llu",
315 (unsigned long long)reg->umax_value);
316 if (!tnum_is_unknown(reg->var_off)) {
319 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
320 verbose(env, ",var_off=%s", tn_buf);
326 for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) {
327 if (state->stack[i].slot_type[0] == STACK_SPILL) {
328 verbose(env, " fp%d",
329 (-i - 1) * BPF_REG_SIZE);
330 print_liveness(env, state->stack[i].spilled_ptr.live);
332 reg_type_str[state->stack[i].spilled_ptr.type]);
334 if (state->stack[i].slot_type[0] == STACK_ZERO)
335 verbose(env, " fp%d=0", (-i - 1) * BPF_REG_SIZE);
340 static int copy_stack_state(struct bpf_func_state *dst,
341 const struct bpf_func_state *src)
345 if (WARN_ON_ONCE(dst->allocated_stack < src->allocated_stack)) {
346 /* internal bug, make state invalid to reject the program */
347 memset(dst, 0, sizeof(*dst));
350 memcpy(dst->stack, src->stack,
351 sizeof(*src->stack) * (src->allocated_stack / BPF_REG_SIZE));
355 /* do_check() starts with zero-sized stack in struct bpf_verifier_state to
356 * make it consume minimal amount of memory. check_stack_write() access from
357 * the program calls into realloc_func_state() to grow the stack size.
358 * Note there is a non-zero 'parent' pointer inside bpf_verifier_state
359 * which this function copies over. It points to previous bpf_verifier_state
360 * which is never reallocated
362 static int realloc_func_state(struct bpf_func_state *state, int size,
365 u32 old_size = state->allocated_stack;
366 struct bpf_stack_state *new_stack;
367 int slot = size / BPF_REG_SIZE;
369 if (size <= old_size || !size) {
372 state->allocated_stack = slot * BPF_REG_SIZE;
373 if (!size && old_size) {
379 new_stack = kmalloc_array(slot, sizeof(struct bpf_stack_state),
385 memcpy(new_stack, state->stack,
386 sizeof(*new_stack) * (old_size / BPF_REG_SIZE));
387 memset(new_stack + old_size / BPF_REG_SIZE, 0,
388 sizeof(*new_stack) * (size - old_size) / BPF_REG_SIZE);
390 state->allocated_stack = slot * BPF_REG_SIZE;
392 state->stack = new_stack;
396 static void free_func_state(struct bpf_func_state *state)
404 static void free_verifier_state(struct bpf_verifier_state *state,
409 for (i = 0; i <= state->curframe; i++) {
410 free_func_state(state->frame[i]);
411 state->frame[i] = NULL;
417 /* copy verifier state from src to dst growing dst stack space
418 * when necessary to accommodate larger src stack
420 static int copy_func_state(struct bpf_func_state *dst,
421 const struct bpf_func_state *src)
425 err = realloc_func_state(dst, src->allocated_stack, false);
428 memcpy(dst, src, offsetof(struct bpf_func_state, allocated_stack));
429 return copy_stack_state(dst, src);
432 static int copy_verifier_state(struct bpf_verifier_state *dst_state,
433 const struct bpf_verifier_state *src)
435 struct bpf_func_state *dst;
438 /* if dst has more stack frames then src frame, free them */
439 for (i = src->curframe + 1; i <= dst_state->curframe; i++) {
440 free_func_state(dst_state->frame[i]);
441 dst_state->frame[i] = NULL;
443 dst_state->curframe = src->curframe;
444 dst_state->parent = src->parent;
445 for (i = 0; i <= src->curframe; i++) {
446 dst = dst_state->frame[i];
448 dst = kzalloc(sizeof(*dst), GFP_KERNEL);
451 dst_state->frame[i] = dst;
453 err = copy_func_state(dst, src->frame[i]);
460 static int pop_stack(struct bpf_verifier_env *env, int *prev_insn_idx,
463 struct bpf_verifier_state *cur = env->cur_state;
464 struct bpf_verifier_stack_elem *elem, *head = env->head;
467 if (env->head == NULL)
471 err = copy_verifier_state(cur, &head->st);
476 *insn_idx = head->insn_idx;
478 *prev_insn_idx = head->prev_insn_idx;
480 free_verifier_state(&head->st, false);
487 static struct bpf_verifier_state *push_stack(struct bpf_verifier_env *env,
488 int insn_idx, int prev_insn_idx)
490 struct bpf_verifier_state *cur = env->cur_state;
491 struct bpf_verifier_stack_elem *elem;
494 elem = kzalloc(sizeof(struct bpf_verifier_stack_elem), GFP_KERNEL);
498 elem->insn_idx = insn_idx;
499 elem->prev_insn_idx = prev_insn_idx;
500 elem->next = env->head;
503 err = copy_verifier_state(&elem->st, cur);
506 if (env->stack_size > BPF_COMPLEXITY_LIMIT_STACK) {
507 verbose(env, "BPF program is too complex\n");
512 free_verifier_state(env->cur_state, true);
513 env->cur_state = NULL;
514 /* pop all elements and return */
515 while (!pop_stack(env, NULL, NULL));
519 #define CALLER_SAVED_REGS 6
520 static const int caller_saved[CALLER_SAVED_REGS] = {
521 BPF_REG_0, BPF_REG_1, BPF_REG_2, BPF_REG_3, BPF_REG_4, BPF_REG_5
524 static void __mark_reg_not_init(struct bpf_reg_state *reg);
526 /* Mark the unknown part of a register (variable offset or scalar value) as
527 * known to have the value @imm.
529 static void __mark_reg_known(struct bpf_reg_state *reg, u64 imm)
532 reg->var_off = tnum_const(imm);
533 reg->smin_value = (s64)imm;
534 reg->smax_value = (s64)imm;
535 reg->umin_value = imm;
536 reg->umax_value = imm;
539 /* Mark the 'variable offset' part of a register as zero. This should be
540 * used only on registers holding a pointer type.
542 static void __mark_reg_known_zero(struct bpf_reg_state *reg)
544 __mark_reg_known(reg, 0);
547 static void __mark_reg_const_zero(struct bpf_reg_state *reg)
549 __mark_reg_known(reg, 0);
551 reg->type = SCALAR_VALUE;
554 static void mark_reg_known_zero(struct bpf_verifier_env *env,
555 struct bpf_reg_state *regs, u32 regno)
557 if (WARN_ON(regno >= MAX_BPF_REG)) {
558 verbose(env, "mark_reg_known_zero(regs, %u)\n", regno);
559 /* Something bad happened, let's kill all regs */
560 for (regno = 0; regno < MAX_BPF_REG; regno++)
561 __mark_reg_not_init(regs + regno);
564 __mark_reg_known_zero(regs + regno);
567 static bool reg_is_pkt_pointer(const struct bpf_reg_state *reg)
569 return type_is_pkt_pointer(reg->type);
572 static bool reg_is_pkt_pointer_any(const struct bpf_reg_state *reg)
574 return reg_is_pkt_pointer(reg) ||
575 reg->type == PTR_TO_PACKET_END;
578 /* Unmodified PTR_TO_PACKET[_META,_END] register from ctx access. */
579 static bool reg_is_init_pkt_pointer(const struct bpf_reg_state *reg,
580 enum bpf_reg_type which)
582 /* The register can already have a range from prior markings.
583 * This is fine as long as it hasn't been advanced from its
586 return reg->type == which &&
589 tnum_equals_const(reg->var_off, 0);
592 /* Attempts to improve min/max values based on var_off information */
593 static void __update_reg_bounds(struct bpf_reg_state *reg)
595 /* min signed is max(sign bit) | min(other bits) */
596 reg->smin_value = max_t(s64, reg->smin_value,
597 reg->var_off.value | (reg->var_off.mask & S64_MIN));
598 /* max signed is min(sign bit) | max(other bits) */
599 reg->smax_value = min_t(s64, reg->smax_value,
600 reg->var_off.value | (reg->var_off.mask & S64_MAX));
601 reg->umin_value = max(reg->umin_value, reg->var_off.value);
602 reg->umax_value = min(reg->umax_value,
603 reg->var_off.value | reg->var_off.mask);
606 /* Uses signed min/max values to inform unsigned, and vice-versa */
607 static void __reg_deduce_bounds(struct bpf_reg_state *reg)
609 /* Learn sign from signed bounds.
610 * If we cannot cross the sign boundary, then signed and unsigned bounds
611 * are the same, so combine. This works even in the negative case, e.g.
612 * -3 s<= x s<= -1 implies 0xf...fd u<= x u<= 0xf...ff.
614 if (reg->smin_value >= 0 || reg->smax_value < 0) {
615 reg->smin_value = reg->umin_value = max_t(u64, reg->smin_value,
617 reg->smax_value = reg->umax_value = min_t(u64, reg->smax_value,
621 /* Learn sign from unsigned bounds. Signed bounds cross the sign
622 * boundary, so we must be careful.
624 if ((s64)reg->umax_value >= 0) {
625 /* Positive. We can't learn anything from the smin, but smax
626 * is positive, hence safe.
628 reg->smin_value = reg->umin_value;
629 reg->smax_value = reg->umax_value = min_t(u64, reg->smax_value,
631 } else if ((s64)reg->umin_value < 0) {
632 /* Negative. We can't learn anything from the smax, but smin
633 * is negative, hence safe.
635 reg->smin_value = reg->umin_value = max_t(u64, reg->smin_value,
637 reg->smax_value = reg->umax_value;
641 /* Attempts to improve var_off based on unsigned min/max information */
642 static void __reg_bound_offset(struct bpf_reg_state *reg)
644 reg->var_off = tnum_intersect(reg->var_off,
645 tnum_range(reg->umin_value,
649 /* Reset the min/max bounds of a register */
650 static void __mark_reg_unbounded(struct bpf_reg_state *reg)
652 reg->smin_value = S64_MIN;
653 reg->smax_value = S64_MAX;
655 reg->umax_value = U64_MAX;
658 /* Mark a register as having a completely unknown (scalar) value. */
659 static void __mark_reg_unknown(struct bpf_reg_state *reg)
661 reg->type = SCALAR_VALUE;
664 reg->var_off = tnum_unknown;
666 __mark_reg_unbounded(reg);
669 static void mark_reg_unknown(struct bpf_verifier_env *env,
670 struct bpf_reg_state *regs, u32 regno)
672 if (WARN_ON(regno >= MAX_BPF_REG)) {
673 verbose(env, "mark_reg_unknown(regs, %u)\n", regno);
674 /* Something bad happened, let's kill all regs except FP */
675 for (regno = 0; regno < BPF_REG_FP; regno++)
676 __mark_reg_not_init(regs + regno);
679 __mark_reg_unknown(regs + regno);
682 static void __mark_reg_not_init(struct bpf_reg_state *reg)
684 __mark_reg_unknown(reg);
685 reg->type = NOT_INIT;
688 static void mark_reg_not_init(struct bpf_verifier_env *env,
689 struct bpf_reg_state *regs, u32 regno)
691 if (WARN_ON(regno >= MAX_BPF_REG)) {
692 verbose(env, "mark_reg_not_init(regs, %u)\n", regno);
693 /* Something bad happened, let's kill all regs except FP */
694 for (regno = 0; regno < BPF_REG_FP; regno++)
695 __mark_reg_not_init(regs + regno);
698 __mark_reg_not_init(regs + regno);
701 static void init_reg_state(struct bpf_verifier_env *env,
702 struct bpf_func_state *state)
704 struct bpf_reg_state *regs = state->regs;
707 for (i = 0; i < MAX_BPF_REG; i++) {
708 mark_reg_not_init(env, regs, i);
709 regs[i].live = REG_LIVE_NONE;
713 regs[BPF_REG_FP].type = PTR_TO_STACK;
714 mark_reg_known_zero(env, regs, BPF_REG_FP);
715 regs[BPF_REG_FP].frameno = state->frameno;
717 /* 1st arg to a function */
718 regs[BPF_REG_1].type = PTR_TO_CTX;
719 mark_reg_known_zero(env, regs, BPF_REG_1);
722 #define BPF_MAIN_FUNC (-1)
723 static void init_func_state(struct bpf_verifier_env *env,
724 struct bpf_func_state *state,
725 int callsite, int frameno, int subprogno)
727 state->callsite = callsite;
728 state->frameno = frameno;
729 state->subprogno = subprogno;
730 init_reg_state(env, state);
734 SRC_OP, /* register is used as source operand */
735 DST_OP, /* register is used as destination operand */
736 DST_OP_NO_MARK /* same as above, check only, don't mark */
739 static int cmp_subprogs(const void *a, const void *b)
741 return *(int *)a - *(int *)b;
744 static int find_subprog(struct bpf_verifier_env *env, int off)
748 p = bsearch(&off, env->subprog_starts, env->subprog_cnt,
749 sizeof(env->subprog_starts[0]), cmp_subprogs);
752 return p - env->subprog_starts;
756 static int add_subprog(struct bpf_verifier_env *env, int off)
758 int insn_cnt = env->prog->len;
761 if (off >= insn_cnt || off < 0) {
762 verbose(env, "call to invalid destination\n");
765 ret = find_subprog(env, off);
768 if (env->subprog_cnt >= BPF_MAX_SUBPROGS) {
769 verbose(env, "too many subprograms\n");
772 env->subprog_starts[env->subprog_cnt++] = off;
773 sort(env->subprog_starts, env->subprog_cnt,
774 sizeof(env->subprog_starts[0]), cmp_subprogs, NULL);
778 static int check_subprogs(struct bpf_verifier_env *env)
780 int i, ret, subprog_start, subprog_end, off, cur_subprog = 0;
781 struct bpf_insn *insn = env->prog->insnsi;
782 int insn_cnt = env->prog->len;
784 /* determine subprog starts. The end is one before the next starts */
785 for (i = 0; i < insn_cnt; i++) {
786 if (insn[i].code != (BPF_JMP | BPF_CALL))
788 if (insn[i].src_reg != BPF_PSEUDO_CALL)
790 if (!env->allow_ptr_leaks) {
791 verbose(env, "function calls to other bpf functions are allowed for root only\n");
794 if (bpf_prog_is_dev_bound(env->prog->aux)) {
795 verbose(env, "function calls in offloaded programs are not supported yet\n");
798 ret = add_subprog(env, i + insn[i].imm + 1);
803 if (env->log.level > 1)
804 for (i = 0; i < env->subprog_cnt; i++)
805 verbose(env, "func#%d @%d\n", i, env->subprog_starts[i]);
807 /* now check that all jumps are within the same subprog */
809 if (env->subprog_cnt == cur_subprog)
810 subprog_end = insn_cnt;
812 subprog_end = env->subprog_starts[cur_subprog++];
813 for (i = 0; i < insn_cnt; i++) {
814 u8 code = insn[i].code;
816 if (BPF_CLASS(code) != BPF_JMP)
818 if (BPF_OP(code) == BPF_EXIT || BPF_OP(code) == BPF_CALL)
820 off = i + insn[i].off + 1;
821 if (off < subprog_start || off >= subprog_end) {
822 verbose(env, "jump out of range from insn %d to %d\n", i, off);
826 if (i == subprog_end - 1) {
827 /* to avoid fall-through from one subprog into another
828 * the last insn of the subprog should be either exit
829 * or unconditional jump back
831 if (code != (BPF_JMP | BPF_EXIT) &&
832 code != (BPF_JMP | BPF_JA)) {
833 verbose(env, "last insn is not an exit or jmp\n");
836 subprog_start = subprog_end;
837 if (env->subprog_cnt == cur_subprog)
838 subprog_end = insn_cnt;
840 subprog_end = env->subprog_starts[cur_subprog++];
847 struct bpf_verifier_state *skip_callee(struct bpf_verifier_env *env,
848 const struct bpf_verifier_state *state,
849 struct bpf_verifier_state *parent,
852 struct bpf_verifier_state *tmp = NULL;
854 /* 'parent' could be a state of caller and
855 * 'state' could be a state of callee. In such case
856 * parent->curframe < state->curframe
857 * and it's ok for r1 - r5 registers
859 * 'parent' could be a callee's state after it bpf_exit-ed.
860 * In such case parent->curframe > state->curframe
861 * and it's ok for r0 only
863 if (parent->curframe == state->curframe ||
864 (parent->curframe < state->curframe &&
865 regno >= BPF_REG_1 && regno <= BPF_REG_5) ||
866 (parent->curframe > state->curframe &&
870 if (parent->curframe > state->curframe &&
871 regno >= BPF_REG_6) {
872 /* for callee saved regs we have to skip the whole chain
873 * of states that belong to callee and mark as LIVE_READ
874 * the registers before the call
877 while (tmp && tmp->curframe != state->curframe) {
888 verbose(env, "verifier bug regno %d tmp %p\n", regno, tmp);
889 verbose(env, "regno %d parent frame %d current frame %d\n",
890 regno, parent->curframe, state->curframe);
894 static int mark_reg_read(struct bpf_verifier_env *env,
895 const struct bpf_verifier_state *state,
896 struct bpf_verifier_state *parent,
899 bool writes = parent == state->parent; /* Observe write marks */
901 if (regno == BPF_REG_FP)
902 /* We don't need to worry about FP liveness because it's read-only */
906 /* if read wasn't screened by an earlier write ... */
907 if (writes && state->frame[state->curframe]->regs[regno].live & REG_LIVE_WRITTEN)
909 parent = skip_callee(env, state, parent, regno);
912 /* ... then we depend on parent's value */
913 parent->frame[parent->curframe]->regs[regno].live |= REG_LIVE_READ;
915 parent = state->parent;
921 static int check_reg_arg(struct bpf_verifier_env *env, u32 regno,
924 struct bpf_verifier_state *vstate = env->cur_state;
925 struct bpf_func_state *state = vstate->frame[vstate->curframe];
926 struct bpf_reg_state *regs = state->regs;
928 if (regno >= MAX_BPF_REG) {
929 verbose(env, "R%d is invalid\n", regno);
934 /* check whether register used as source operand can be read */
935 if (regs[regno].type == NOT_INIT) {
936 verbose(env, "R%d !read_ok\n", regno);
939 return mark_reg_read(env, vstate, vstate->parent, regno);
941 /* check whether register used as dest operand can be written to */
942 if (regno == BPF_REG_FP) {
943 verbose(env, "frame pointer is read only\n");
946 regs[regno].live |= REG_LIVE_WRITTEN;
948 mark_reg_unknown(env, regs, regno);
953 static bool is_spillable_regtype(enum bpf_reg_type type)
956 case PTR_TO_MAP_VALUE:
957 case PTR_TO_MAP_VALUE_OR_NULL:
961 case PTR_TO_PACKET_META:
962 case PTR_TO_PACKET_END:
963 case CONST_PTR_TO_MAP:
970 /* Does this register contain a constant zero? */
971 static bool register_is_null(struct bpf_reg_state *reg)
973 return reg->type == SCALAR_VALUE && tnum_equals_const(reg->var_off, 0);
976 /* check_stack_read/write functions track spill/fill of registers,
977 * stack boundary and alignment are checked in check_mem_access()
979 static int check_stack_write(struct bpf_verifier_env *env,
980 struct bpf_func_state *state, /* func where register points to */
981 int off, int size, int value_regno)
983 struct bpf_func_state *cur; /* state of the current function */
984 int i, slot = -off - 1, spi = slot / BPF_REG_SIZE, err;
985 enum bpf_reg_type type;
987 err = realloc_func_state(state, round_up(slot + 1, BPF_REG_SIZE),
991 /* caller checked that off % size == 0 and -MAX_BPF_STACK <= off < 0,
992 * so it's aligned access and [off, off + size) are within stack limits
994 if (!env->allow_ptr_leaks &&
995 state->stack[spi].slot_type[0] == STACK_SPILL &&
996 size != BPF_REG_SIZE) {
997 verbose(env, "attempt to corrupt spilled pointer on stack\n");
1001 cur = env->cur_state->frame[env->cur_state->curframe];
1002 if (value_regno >= 0 &&
1003 is_spillable_regtype((type = cur->regs[value_regno].type))) {
1005 /* register containing pointer is being spilled into stack */
1006 if (size != BPF_REG_SIZE) {
1007 verbose(env, "invalid size of register spill\n");
1011 if (state != cur && type == PTR_TO_STACK) {
1012 verbose(env, "cannot spill pointers to stack into stack frame of the caller\n");
1016 /* save register state */
1017 state->stack[spi].spilled_ptr = cur->regs[value_regno];
1018 state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN;
1020 for (i = 0; i < BPF_REG_SIZE; i++)
1021 state->stack[spi].slot_type[i] = STACK_SPILL;
1023 u8 type = STACK_MISC;
1025 /* regular write of data into stack */
1026 state->stack[spi].spilled_ptr = (struct bpf_reg_state) {};
1028 /* only mark the slot as written if all 8 bytes were written
1029 * otherwise read propagation may incorrectly stop too soon
1030 * when stack slots are partially written.
1031 * This heuristic means that read propagation will be
1032 * conservative, since it will add reg_live_read marks
1033 * to stack slots all the way to first state when programs
1034 * writes+reads less than 8 bytes
1036 if (size == BPF_REG_SIZE)
1037 state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN;
1039 /* when we zero initialize stack slots mark them as such */
1040 if (value_regno >= 0 &&
1041 register_is_null(&cur->regs[value_regno]))
1044 for (i = 0; i < size; i++)
1045 state->stack[spi].slot_type[(slot - i) % BPF_REG_SIZE] =
1051 /* registers of every function are unique and mark_reg_read() propagates
1052 * the liveness in the following cases:
1053 * - from callee into caller for R1 - R5 that were used as arguments
1054 * - from caller into callee for R0 that used as result of the call
1055 * - from caller to the same caller skipping states of the callee for R6 - R9,
1056 * since R6 - R9 are callee saved by implicit function prologue and
1057 * caller's R6 != callee's R6, so when we propagate liveness up to
1058 * parent states we need to skip callee states for R6 - R9.
1060 * stack slot marking is different, since stacks of caller and callee are
1061 * accessible in both (since caller can pass a pointer to caller's stack to
1062 * callee which can pass it to another function), hence mark_stack_slot_read()
1063 * has to propagate the stack liveness to all parent states at given frame number.
1073 * First *ptr is reading from f1's stack and mark_stack_slot_read() has
1074 * to mark liveness at the f1's frame and not f2's frame.
1075 * Second *ptr is also reading from f1's stack and mark_stack_slot_read() has
1076 * to propagate liveness to f2 states at f1's frame level and further into
1077 * f1 states at f1's frame level until write into that stack slot
1079 static void mark_stack_slot_read(struct bpf_verifier_env *env,
1080 const struct bpf_verifier_state *state,
1081 struct bpf_verifier_state *parent,
1082 int slot, int frameno)
1084 bool writes = parent == state->parent; /* Observe write marks */
1087 if (parent->frame[frameno]->allocated_stack <= slot * BPF_REG_SIZE)
1088 /* since LIVE_WRITTEN mark is only done for full 8-byte
1089 * write the read marks are conservative and parent
1090 * state may not even have the stack allocated. In such case
1091 * end the propagation, since the loop reached beginning
1095 /* if read wasn't screened by an earlier write ... */
1096 if (writes && state->frame[frameno]->stack[slot].spilled_ptr.live & REG_LIVE_WRITTEN)
1098 /* ... then we depend on parent's value */
1099 parent->frame[frameno]->stack[slot].spilled_ptr.live |= REG_LIVE_READ;
1101 parent = state->parent;
1106 static int check_stack_read(struct bpf_verifier_env *env,
1107 struct bpf_func_state *reg_state /* func where register points to */,
1108 int off, int size, int value_regno)
1110 struct bpf_verifier_state *vstate = env->cur_state;
1111 struct bpf_func_state *state = vstate->frame[vstate->curframe];
1112 int i, slot = -off - 1, spi = slot / BPF_REG_SIZE;
1115 if (reg_state->allocated_stack <= slot) {
1116 verbose(env, "invalid read from stack off %d+0 size %d\n",
1120 stype = reg_state->stack[spi].slot_type;
1122 if (stype[0] == STACK_SPILL) {
1123 if (size != BPF_REG_SIZE) {
1124 verbose(env, "invalid size of register spill\n");
1127 for (i = 1; i < BPF_REG_SIZE; i++) {
1128 if (stype[(slot - i) % BPF_REG_SIZE] != STACK_SPILL) {
1129 verbose(env, "corrupted spill memory\n");
1134 if (value_regno >= 0) {
1135 /* restore register state from stack */
1136 state->regs[value_regno] = reg_state->stack[spi].spilled_ptr;
1137 /* mark reg as written since spilled pointer state likely
1138 * has its liveness marks cleared by is_state_visited()
1139 * which resets stack/reg liveness for state transitions
1141 state->regs[value_regno].live |= REG_LIVE_WRITTEN;
1143 mark_stack_slot_read(env, vstate, vstate->parent, spi,
1144 reg_state->frameno);
1149 for (i = 0; i < size; i++) {
1150 if (stype[(slot - i) % BPF_REG_SIZE] == STACK_MISC)
1152 if (stype[(slot - i) % BPF_REG_SIZE] == STACK_ZERO) {
1156 verbose(env, "invalid read from stack off %d+%d size %d\n",
1160 mark_stack_slot_read(env, vstate, vstate->parent, spi,
1161 reg_state->frameno);
1162 if (value_regno >= 0) {
1163 if (zeros == size) {
1164 /* any size read into register is zero extended,
1165 * so the whole register == const_zero
1167 __mark_reg_const_zero(&state->regs[value_regno]);
1169 /* have read misc data from the stack */
1170 mark_reg_unknown(env, state->regs, value_regno);
1172 state->regs[value_regno].live |= REG_LIVE_WRITTEN;
1178 /* check read/write into map element returned by bpf_map_lookup_elem() */
1179 static int __check_map_access(struct bpf_verifier_env *env, u32 regno, int off,
1180 int size, bool zero_size_allowed)
1182 struct bpf_reg_state *regs = cur_regs(env);
1183 struct bpf_map *map = regs[regno].map_ptr;
1185 if (off < 0 || size < 0 || (size == 0 && !zero_size_allowed) ||
1186 off + size > map->value_size) {
1187 verbose(env, "invalid access to map value, value_size=%d off=%d size=%d\n",
1188 map->value_size, off, size);
1194 /* check read/write into a map element with possible variable offset */
1195 static int check_map_access(struct bpf_verifier_env *env, u32 regno,
1196 int off, int size, bool zero_size_allowed)
1198 struct bpf_verifier_state *vstate = env->cur_state;
1199 struct bpf_func_state *state = vstate->frame[vstate->curframe];
1200 struct bpf_reg_state *reg = &state->regs[regno];
1203 /* We may have adjusted the register to this map value, so we
1204 * need to try adding each of min_value and max_value to off
1205 * to make sure our theoretical access will be safe.
1208 print_verifier_state(env, state);
1209 /* The minimum value is only important with signed
1210 * comparisons where we can't assume the floor of a
1211 * value is 0. If we are using signed variables for our
1212 * index'es we need to make sure that whatever we use
1213 * will have a set floor within our range.
1215 if (reg->smin_value < 0) {
1216 verbose(env, "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n",
1220 err = __check_map_access(env, regno, reg->smin_value + off, size,
1223 verbose(env, "R%d min value is outside of the array range\n",
1228 /* If we haven't set a max value then we need to bail since we can't be
1229 * sure we won't do bad things.
1230 * If reg->umax_value + off could overflow, treat that as unbounded too.
1232 if (reg->umax_value >= BPF_MAX_VAR_OFF) {
1233 verbose(env, "R%d unbounded memory access, make sure to bounds check any array access into a map\n",
1237 err = __check_map_access(env, regno, reg->umax_value + off, size,
1240 verbose(env, "R%d max value is outside of the array range\n",
1245 #define MAX_PACKET_OFF 0xffff
1247 static bool may_access_direct_pkt_data(struct bpf_verifier_env *env,
1248 const struct bpf_call_arg_meta *meta,
1249 enum bpf_access_type t)
1251 switch (env->prog->type) {
1252 case BPF_PROG_TYPE_LWT_IN:
1253 case BPF_PROG_TYPE_LWT_OUT:
1254 /* dst_input() and dst_output() can't write for now */
1258 case BPF_PROG_TYPE_SCHED_CLS:
1259 case BPF_PROG_TYPE_SCHED_ACT:
1260 case BPF_PROG_TYPE_XDP:
1261 case BPF_PROG_TYPE_LWT_XMIT:
1262 case BPF_PROG_TYPE_SK_SKB:
1263 case BPF_PROG_TYPE_SK_MSG:
1265 return meta->pkt_access;
1267 env->seen_direct_write = true;
1274 static int __check_packet_access(struct bpf_verifier_env *env, u32 regno,
1275 int off, int size, bool zero_size_allowed)
1277 struct bpf_reg_state *regs = cur_regs(env);
1278 struct bpf_reg_state *reg = ®s[regno];
1280 if (off < 0 || size < 0 || (size == 0 && !zero_size_allowed) ||
1281 (u64)off + size > reg->range) {
1282 verbose(env, "invalid access to packet, off=%d size=%d, R%d(id=%d,off=%d,r=%d)\n",
1283 off, size, regno, reg->id, reg->off, reg->range);
1289 static int check_packet_access(struct bpf_verifier_env *env, u32 regno, int off,
1290 int size, bool zero_size_allowed)
1292 struct bpf_reg_state *regs = cur_regs(env);
1293 struct bpf_reg_state *reg = ®s[regno];
1296 /* We may have added a variable offset to the packet pointer; but any
1297 * reg->range we have comes after that. We are only checking the fixed
1301 /* We don't allow negative numbers, because we aren't tracking enough
1302 * detail to prove they're safe.
1304 if (reg->smin_value < 0) {
1305 verbose(env, "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n",
1309 err = __check_packet_access(env, regno, off, size, zero_size_allowed);
1311 verbose(env, "R%d offset is outside of the packet\n", regno);
1317 /* check access to 'struct bpf_context' fields. Supports fixed offsets only */
1318 static int check_ctx_access(struct bpf_verifier_env *env, int insn_idx, int off, int size,
1319 enum bpf_access_type t, enum bpf_reg_type *reg_type)
1321 struct bpf_insn_access_aux info = {
1322 .reg_type = *reg_type,
1325 if (env->ops->is_valid_access &&
1326 env->ops->is_valid_access(off, size, t, env->prog, &info)) {
1327 /* A non zero info.ctx_field_size indicates that this field is a
1328 * candidate for later verifier transformation to load the whole
1329 * field and then apply a mask when accessed with a narrower
1330 * access than actual ctx access size. A zero info.ctx_field_size
1331 * will only allow for whole field access and rejects any other
1332 * type of narrower access.
1334 *reg_type = info.reg_type;
1336 env->insn_aux_data[insn_idx].ctx_field_size = info.ctx_field_size;
1337 /* remember the offset of last byte accessed in ctx */
1338 if (env->prog->aux->max_ctx_offset < off + size)
1339 env->prog->aux->max_ctx_offset = off + size;
1343 verbose(env, "invalid bpf_context access off=%d size=%d\n", off, size);
1347 static bool __is_pointer_value(bool allow_ptr_leaks,
1348 const struct bpf_reg_state *reg)
1350 if (allow_ptr_leaks)
1353 return reg->type != SCALAR_VALUE;
1356 static bool is_pointer_value(struct bpf_verifier_env *env, int regno)
1358 return __is_pointer_value(env->allow_ptr_leaks, cur_regs(env) + regno);
1361 static bool is_ctx_reg(struct bpf_verifier_env *env, int regno)
1363 const struct bpf_reg_state *reg = cur_regs(env) + regno;
1365 return reg->type == PTR_TO_CTX;
1368 static bool is_pkt_reg(struct bpf_verifier_env *env, int regno)
1370 const struct bpf_reg_state *reg = cur_regs(env) + regno;
1372 return type_is_pkt_pointer(reg->type);
1375 static int check_pkt_ptr_alignment(struct bpf_verifier_env *env,
1376 const struct bpf_reg_state *reg,
1377 int off, int size, bool strict)
1379 struct tnum reg_off;
1382 /* Byte size accesses are always allowed. */
1383 if (!strict || size == 1)
1386 /* For platforms that do not have a Kconfig enabling
1387 * CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS the value of
1388 * NET_IP_ALIGN is universally set to '2'. And on platforms
1389 * that do set CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS, we get
1390 * to this code only in strict mode where we want to emulate
1391 * the NET_IP_ALIGN==2 checking. Therefore use an
1392 * unconditional IP align value of '2'.
1396 reg_off = tnum_add(reg->var_off, tnum_const(ip_align + reg->off + off));
1397 if (!tnum_is_aligned(reg_off, size)) {
1400 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
1402 "misaligned packet access off %d+%s+%d+%d size %d\n",
1403 ip_align, tn_buf, reg->off, off, size);
1410 static int check_generic_ptr_alignment(struct bpf_verifier_env *env,
1411 const struct bpf_reg_state *reg,
1412 const char *pointer_desc,
1413 int off, int size, bool strict)
1415 struct tnum reg_off;
1417 /* Byte size accesses are always allowed. */
1418 if (!strict || size == 1)
1421 reg_off = tnum_add(reg->var_off, tnum_const(reg->off + off));
1422 if (!tnum_is_aligned(reg_off, size)) {
1425 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
1426 verbose(env, "misaligned %saccess off %s+%d+%d size %d\n",
1427 pointer_desc, tn_buf, reg->off, off, size);
1434 static int check_ptr_alignment(struct bpf_verifier_env *env,
1435 const struct bpf_reg_state *reg, int off,
1436 int size, bool strict_alignment_once)
1438 bool strict = env->strict_alignment || strict_alignment_once;
1439 const char *pointer_desc = "";
1441 switch (reg->type) {
1443 case PTR_TO_PACKET_META:
1444 /* Special case, because of NET_IP_ALIGN. Given metadata sits
1445 * right in front, treat it the very same way.
1447 return check_pkt_ptr_alignment(env, reg, off, size, strict);
1448 case PTR_TO_MAP_VALUE:
1449 pointer_desc = "value ";
1452 pointer_desc = "context ";
1455 pointer_desc = "stack ";
1456 /* The stack spill tracking logic in check_stack_write()
1457 * and check_stack_read() relies on stack accesses being
1465 return check_generic_ptr_alignment(env, reg, pointer_desc, off, size,
1469 static int update_stack_depth(struct bpf_verifier_env *env,
1470 const struct bpf_func_state *func,
1473 u16 stack = env->subprog_stack_depth[func->subprogno];
1478 /* update known max for given subprogram */
1479 env->subprog_stack_depth[func->subprogno] = -off;
1483 /* starting from main bpf function walk all instructions of the function
1484 * and recursively walk all callees that given function can call.
1485 * Ignore jump and exit insns.
1486 * Since recursion is prevented by check_cfg() this algorithm
1487 * only needs a local stack of MAX_CALL_FRAMES to remember callsites
1489 static int check_max_stack_depth(struct bpf_verifier_env *env)
1491 int depth = 0, frame = 0, subprog = 0, i = 0, subprog_end;
1492 struct bpf_insn *insn = env->prog->insnsi;
1493 int insn_cnt = env->prog->len;
1494 int ret_insn[MAX_CALL_FRAMES];
1495 int ret_prog[MAX_CALL_FRAMES];
1498 /* round up to 32-bytes, since this is granularity
1499 * of interpreter stack size
1501 depth += round_up(max_t(u32, env->subprog_stack_depth[subprog], 1), 32);
1502 if (depth > MAX_BPF_STACK) {
1503 verbose(env, "combined stack size of %d calls is %d. Too large\n",
1508 if (env->subprog_cnt == subprog)
1509 subprog_end = insn_cnt;
1511 subprog_end = env->subprog_starts[subprog];
1512 for (; i < subprog_end; i++) {
1513 if (insn[i].code != (BPF_JMP | BPF_CALL))
1515 if (insn[i].src_reg != BPF_PSEUDO_CALL)
1517 /* remember insn and function to return to */
1518 ret_insn[frame] = i + 1;
1519 ret_prog[frame] = subprog;
1521 /* find the callee */
1522 i = i + insn[i].imm + 1;
1523 subprog = find_subprog(env, i);
1525 WARN_ONCE(1, "verifier bug. No program starts at insn %d\n",
1531 if (frame >= MAX_CALL_FRAMES) {
1532 WARN_ONCE(1, "verifier bug. Call stack is too deep\n");
1537 /* end of for() loop means the last insn of the 'subprog'
1538 * was reached. Doesn't matter whether it was JA or EXIT
1542 depth -= round_up(max_t(u32, env->subprog_stack_depth[subprog], 1), 32);
1544 i = ret_insn[frame];
1545 subprog = ret_prog[frame];
1549 #ifndef CONFIG_BPF_JIT_ALWAYS_ON
1550 static int get_callee_stack_depth(struct bpf_verifier_env *env,
1551 const struct bpf_insn *insn, int idx)
1553 int start = idx + insn->imm + 1, subprog;
1555 subprog = find_subprog(env, start);
1557 WARN_ONCE(1, "verifier bug. No program starts at insn %d\n",
1562 return env->subprog_stack_depth[subprog];
1566 /* truncate register to smaller size (in bytes)
1567 * must be called with size < BPF_REG_SIZE
1569 static void coerce_reg_to_size(struct bpf_reg_state *reg, int size)
1573 /* clear high bits in bit representation */
1574 reg->var_off = tnum_cast(reg->var_off, size);
1576 /* fix arithmetic bounds */
1577 mask = ((u64)1 << (size * 8)) - 1;
1578 if ((reg->umin_value & ~mask) == (reg->umax_value & ~mask)) {
1579 reg->umin_value &= mask;
1580 reg->umax_value &= mask;
1582 reg->umin_value = 0;
1583 reg->umax_value = mask;
1585 reg->smin_value = reg->umin_value;
1586 reg->smax_value = reg->umax_value;
1589 /* check whether memory at (regno + off) is accessible for t = (read | write)
1590 * if t==write, value_regno is a register which value is stored into memory
1591 * if t==read, value_regno is a register which will receive the value from memory
1592 * if t==write && value_regno==-1, some unknown value is stored into memory
1593 * if t==read && value_regno==-1, don't care what we read from memory
1595 static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regno,
1596 int off, int bpf_size, enum bpf_access_type t,
1597 int value_regno, bool strict_alignment_once)
1599 struct bpf_reg_state *regs = cur_regs(env);
1600 struct bpf_reg_state *reg = regs + regno;
1601 struct bpf_func_state *state;
1604 size = bpf_size_to_bytes(bpf_size);
1608 /* alignment checks will add in reg->off themselves */
1609 err = check_ptr_alignment(env, reg, off, size, strict_alignment_once);
1613 /* for access checks, reg->off is just part of off */
1616 if (reg->type == PTR_TO_MAP_VALUE) {
1617 if (t == BPF_WRITE && value_regno >= 0 &&
1618 is_pointer_value(env, value_regno)) {
1619 verbose(env, "R%d leaks addr into map\n", value_regno);
1623 err = check_map_access(env, regno, off, size, false);
1624 if (!err && t == BPF_READ && value_regno >= 0)
1625 mark_reg_unknown(env, regs, value_regno);
1627 } else if (reg->type == PTR_TO_CTX) {
1628 enum bpf_reg_type reg_type = SCALAR_VALUE;
1630 if (t == BPF_WRITE && value_regno >= 0 &&
1631 is_pointer_value(env, value_regno)) {
1632 verbose(env, "R%d leaks addr into ctx\n", value_regno);
1635 /* ctx accesses must be at a fixed offset, so that we can
1636 * determine what type of data were returned.
1640 "dereference of modified ctx ptr R%d off=%d+%d, ctx+const is allowed, ctx+const+const is not\n",
1641 regno, reg->off, off - reg->off);
1644 if (!tnum_is_const(reg->var_off) || reg->var_off.value) {
1647 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
1649 "variable ctx access var_off=%s off=%d size=%d",
1653 err = check_ctx_access(env, insn_idx, off, size, t, ®_type);
1654 if (!err && t == BPF_READ && value_regno >= 0) {
1655 /* ctx access returns either a scalar, or a
1656 * PTR_TO_PACKET[_META,_END]. In the latter
1657 * case, we know the offset is zero.
1659 if (reg_type == SCALAR_VALUE)
1660 mark_reg_unknown(env, regs, value_regno);
1662 mark_reg_known_zero(env, regs,
1664 regs[value_regno].id = 0;
1665 regs[value_regno].off = 0;
1666 regs[value_regno].range = 0;
1667 regs[value_regno].type = reg_type;
1670 } else if (reg->type == PTR_TO_STACK) {
1671 /* stack accesses must be at a fixed offset, so that we can
1672 * determine what type of data were returned.
1673 * See check_stack_read().
1675 if (!tnum_is_const(reg->var_off)) {
1678 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
1679 verbose(env, "variable stack access var_off=%s off=%d size=%d",
1683 off += reg->var_off.value;
1684 if (off >= 0 || off < -MAX_BPF_STACK) {
1685 verbose(env, "invalid stack off=%d size=%d\n", off,
1690 state = func(env, reg);
1691 err = update_stack_depth(env, state, off);
1696 err = check_stack_write(env, state, off, size,
1699 err = check_stack_read(env, state, off, size,
1701 } else if (reg_is_pkt_pointer(reg)) {
1702 if (t == BPF_WRITE && !may_access_direct_pkt_data(env, NULL, t)) {
1703 verbose(env, "cannot write into packet\n");
1706 if (t == BPF_WRITE && value_regno >= 0 &&
1707 is_pointer_value(env, value_regno)) {
1708 verbose(env, "R%d leaks addr into packet\n",
1712 err = check_packet_access(env, regno, off, size, false);
1713 if (!err && t == BPF_READ && value_regno >= 0)
1714 mark_reg_unknown(env, regs, value_regno);
1716 verbose(env, "R%d invalid mem access '%s'\n", regno,
1717 reg_type_str[reg->type]);
1721 if (!err && size < BPF_REG_SIZE && value_regno >= 0 && t == BPF_READ &&
1722 regs[value_regno].type == SCALAR_VALUE) {
1723 /* b/h/w load zero-extends, mark upper bits as known 0 */
1724 coerce_reg_to_size(®s[value_regno], size);
1729 static int check_xadd(struct bpf_verifier_env *env, int insn_idx, struct bpf_insn *insn)
1733 if ((BPF_SIZE(insn->code) != BPF_W && BPF_SIZE(insn->code) != BPF_DW) ||
1735 verbose(env, "BPF_XADD uses reserved fields\n");
1739 /* check src1 operand */
1740 err = check_reg_arg(env, insn->src_reg, SRC_OP);
1744 /* check src2 operand */
1745 err = check_reg_arg(env, insn->dst_reg, SRC_OP);
1749 if (is_pointer_value(env, insn->src_reg)) {
1750 verbose(env, "R%d leaks addr into mem\n", insn->src_reg);
1754 if (is_ctx_reg(env, insn->dst_reg) ||
1755 is_pkt_reg(env, insn->dst_reg)) {
1756 verbose(env, "BPF_XADD stores into R%d %s is not allowed\n",
1757 insn->dst_reg, is_ctx_reg(env, insn->dst_reg) ?
1758 "context" : "packet");
1762 /* check whether atomic_add can read the memory */
1763 err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off,
1764 BPF_SIZE(insn->code), BPF_READ, -1, true);
1768 /* check whether atomic_add can write into the same memory */
1769 return check_mem_access(env, insn_idx, insn->dst_reg, insn->off,
1770 BPF_SIZE(insn->code), BPF_WRITE, -1, true);
1773 /* when register 'regno' is passed into function that will read 'access_size'
1774 * bytes from that pointer, make sure that it's within stack boundary
1775 * and all elements of stack are initialized.
1776 * Unlike most pointer bounds-checking functions, this one doesn't take an
1777 * 'off' argument, so it has to add in reg->off itself.
1779 static int check_stack_boundary(struct bpf_verifier_env *env, int regno,
1780 int access_size, bool zero_size_allowed,
1781 struct bpf_call_arg_meta *meta)
1783 struct bpf_reg_state *reg = cur_regs(env) + regno;
1784 struct bpf_func_state *state = func(env, reg);
1785 int off, i, slot, spi;
1787 if (reg->type != PTR_TO_STACK) {
1788 /* Allow zero-byte read from NULL, regardless of pointer type */
1789 if (zero_size_allowed && access_size == 0 &&
1790 register_is_null(reg))
1793 verbose(env, "R%d type=%s expected=%s\n", regno,
1794 reg_type_str[reg->type],
1795 reg_type_str[PTR_TO_STACK]);
1799 /* Only allow fixed-offset stack reads */
1800 if (!tnum_is_const(reg->var_off)) {
1803 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
1804 verbose(env, "invalid variable stack read R%d var_off=%s\n",
1808 off = reg->off + reg->var_off.value;
1809 if (off >= 0 || off < -MAX_BPF_STACK || off + access_size > 0 ||
1810 access_size < 0 || (access_size == 0 && !zero_size_allowed)) {
1811 verbose(env, "invalid stack type R%d off=%d access_size=%d\n",
1812 regno, off, access_size);
1816 if (meta && meta->raw_mode) {
1817 meta->access_size = access_size;
1818 meta->regno = regno;
1822 for (i = 0; i < access_size; i++) {
1825 slot = -(off + i) - 1;
1826 spi = slot / BPF_REG_SIZE;
1827 if (state->allocated_stack <= slot)
1829 stype = &state->stack[spi].slot_type[slot % BPF_REG_SIZE];
1830 if (*stype == STACK_MISC)
1832 if (*stype == STACK_ZERO) {
1833 /* helper can write anything into the stack */
1834 *stype = STACK_MISC;
1838 verbose(env, "invalid indirect read from stack off %d+%d size %d\n",
1839 off, i, access_size);
1842 /* reading any byte out of 8-byte 'spill_slot' will cause
1843 * the whole slot to be marked as 'read'
1845 mark_stack_slot_read(env, env->cur_state, env->cur_state->parent,
1846 spi, state->frameno);
1848 return update_stack_depth(env, state, off);
1851 static int check_helper_mem_access(struct bpf_verifier_env *env, int regno,
1852 int access_size, bool zero_size_allowed,
1853 struct bpf_call_arg_meta *meta)
1855 struct bpf_reg_state *regs = cur_regs(env), *reg = ®s[regno];
1857 switch (reg->type) {
1859 case PTR_TO_PACKET_META:
1860 return check_packet_access(env, regno, reg->off, access_size,
1862 case PTR_TO_MAP_VALUE:
1863 return check_map_access(env, regno, reg->off, access_size,
1865 default: /* scalar_value|ptr_to_stack or invalid ptr */
1866 return check_stack_boundary(env, regno, access_size,
1867 zero_size_allowed, meta);
1871 static bool arg_type_is_mem_ptr(enum bpf_arg_type type)
1873 return type == ARG_PTR_TO_MEM ||
1874 type == ARG_PTR_TO_MEM_OR_NULL ||
1875 type == ARG_PTR_TO_UNINIT_MEM;
1878 static bool arg_type_is_mem_size(enum bpf_arg_type type)
1880 return type == ARG_CONST_SIZE ||
1881 type == ARG_CONST_SIZE_OR_ZERO;
1884 static int check_func_arg(struct bpf_verifier_env *env, u32 regno,
1885 enum bpf_arg_type arg_type,
1886 struct bpf_call_arg_meta *meta)
1888 struct bpf_reg_state *regs = cur_regs(env), *reg = ®s[regno];
1889 enum bpf_reg_type expected_type, type = reg->type;
1892 if (arg_type == ARG_DONTCARE)
1895 err = check_reg_arg(env, regno, SRC_OP);
1899 if (arg_type == ARG_ANYTHING) {
1900 if (is_pointer_value(env, regno)) {
1901 verbose(env, "R%d leaks addr into helper function\n",
1908 if (type_is_pkt_pointer(type) &&
1909 !may_access_direct_pkt_data(env, meta, BPF_READ)) {
1910 verbose(env, "helper access to the packet is not allowed\n");
1914 if (arg_type == ARG_PTR_TO_MAP_KEY ||
1915 arg_type == ARG_PTR_TO_MAP_VALUE) {
1916 expected_type = PTR_TO_STACK;
1917 if (!type_is_pkt_pointer(type) &&
1918 type != expected_type)
1920 } else if (arg_type == ARG_CONST_SIZE ||
1921 arg_type == ARG_CONST_SIZE_OR_ZERO) {
1922 expected_type = SCALAR_VALUE;
1923 if (type != expected_type)
1925 } else if (arg_type == ARG_CONST_MAP_PTR) {
1926 expected_type = CONST_PTR_TO_MAP;
1927 if (type != expected_type)
1929 } else if (arg_type == ARG_PTR_TO_CTX) {
1930 expected_type = PTR_TO_CTX;
1931 if (type != expected_type)
1933 } else if (arg_type_is_mem_ptr(arg_type)) {
1934 expected_type = PTR_TO_STACK;
1935 /* One exception here. In case function allows for NULL to be
1936 * passed in as argument, it's a SCALAR_VALUE type. Final test
1937 * happens during stack boundary checking.
1939 if (register_is_null(reg) &&
1940 arg_type == ARG_PTR_TO_MEM_OR_NULL)
1941 /* final test in check_stack_boundary() */;
1942 else if (!type_is_pkt_pointer(type) &&
1943 type != PTR_TO_MAP_VALUE &&
1944 type != expected_type)
1946 meta->raw_mode = arg_type == ARG_PTR_TO_UNINIT_MEM;
1948 verbose(env, "unsupported arg_type %d\n", arg_type);
1952 if (arg_type == ARG_CONST_MAP_PTR) {
1953 /* bpf_map_xxx(map_ptr) call: remember that map_ptr */
1954 meta->map_ptr = reg->map_ptr;
1955 } else if (arg_type == ARG_PTR_TO_MAP_KEY) {
1956 /* bpf_map_xxx(..., map_ptr, ..., key) call:
1957 * check that [key, key + map->key_size) are within
1958 * stack limits and initialized
1960 if (!meta->map_ptr) {
1961 /* in function declaration map_ptr must come before
1962 * map_key, so that it's verified and known before
1963 * we have to check map_key here. Otherwise it means
1964 * that kernel subsystem misconfigured verifier
1966 verbose(env, "invalid map_ptr to access map->key\n");
1969 if (type_is_pkt_pointer(type))
1970 err = check_packet_access(env, regno, reg->off,
1971 meta->map_ptr->key_size,
1974 err = check_stack_boundary(env, regno,
1975 meta->map_ptr->key_size,
1977 } else if (arg_type == ARG_PTR_TO_MAP_VALUE) {
1978 /* bpf_map_xxx(..., map_ptr, ..., value) call:
1979 * check [value, value + map->value_size) validity
1981 if (!meta->map_ptr) {
1982 /* kernel subsystem misconfigured verifier */
1983 verbose(env, "invalid map_ptr to access map->value\n");
1986 if (type_is_pkt_pointer(type))
1987 err = check_packet_access(env, regno, reg->off,
1988 meta->map_ptr->value_size,
1991 err = check_stack_boundary(env, regno,
1992 meta->map_ptr->value_size,
1994 } else if (arg_type_is_mem_size(arg_type)) {
1995 bool zero_size_allowed = (arg_type == ARG_CONST_SIZE_OR_ZERO);
1997 /* The register is SCALAR_VALUE; the access check
1998 * happens using its boundaries.
2000 if (!tnum_is_const(reg->var_off))
2001 /* For unprivileged variable accesses, disable raw
2002 * mode so that the program is required to
2003 * initialize all the memory that the helper could
2004 * just partially fill up.
2008 if (reg->smin_value < 0) {
2009 verbose(env, "R%d min value is negative, either use unsigned or 'var &= const'\n",
2014 if (reg->umin_value == 0) {
2015 err = check_helper_mem_access(env, regno - 1, 0,
2022 if (reg->umax_value >= BPF_MAX_VAR_SIZ) {
2023 verbose(env, "R%d unbounded memory access, use 'var &= const' or 'if (var < const)'\n",
2027 err = check_helper_mem_access(env, regno - 1,
2029 zero_size_allowed, meta);
2034 verbose(env, "R%d type=%s expected=%s\n", regno,
2035 reg_type_str[type], reg_type_str[expected_type]);
2039 static int check_map_func_compatibility(struct bpf_verifier_env *env,
2040 struct bpf_map *map, int func_id)
2045 /* We need a two way check, first is from map perspective ... */
2046 switch (map->map_type) {
2047 case BPF_MAP_TYPE_PROG_ARRAY:
2048 if (func_id != BPF_FUNC_tail_call)
2051 case BPF_MAP_TYPE_PERF_EVENT_ARRAY:
2052 if (func_id != BPF_FUNC_perf_event_read &&
2053 func_id != BPF_FUNC_perf_event_output &&
2054 func_id != BPF_FUNC_perf_event_read_value)
2057 case BPF_MAP_TYPE_STACK_TRACE:
2058 if (func_id != BPF_FUNC_get_stackid)
2061 case BPF_MAP_TYPE_CGROUP_ARRAY:
2062 if (func_id != BPF_FUNC_skb_under_cgroup &&
2063 func_id != BPF_FUNC_current_task_under_cgroup)
2066 /* devmap returns a pointer to a live net_device ifindex that we cannot
2067 * allow to be modified from bpf side. So do not allow lookup elements
2070 case BPF_MAP_TYPE_DEVMAP:
2071 if (func_id != BPF_FUNC_redirect_map)
2074 /* Restrict bpf side of cpumap, open when use-cases appear */
2075 case BPF_MAP_TYPE_CPUMAP:
2076 if (func_id != BPF_FUNC_redirect_map)
2079 case BPF_MAP_TYPE_ARRAY_OF_MAPS:
2080 case BPF_MAP_TYPE_HASH_OF_MAPS:
2081 if (func_id != BPF_FUNC_map_lookup_elem)
2084 case BPF_MAP_TYPE_SOCKMAP:
2085 if (func_id != BPF_FUNC_sk_redirect_map &&
2086 func_id != BPF_FUNC_sock_map_update &&
2087 func_id != BPF_FUNC_map_delete_elem &&
2088 func_id != BPF_FUNC_msg_redirect_map)
2095 /* ... and second from the function itself. */
2097 case BPF_FUNC_tail_call:
2098 if (map->map_type != BPF_MAP_TYPE_PROG_ARRAY)
2100 if (env->subprog_cnt) {
2101 verbose(env, "tail_calls are not allowed in programs with bpf-to-bpf calls\n");
2105 case BPF_FUNC_perf_event_read:
2106 case BPF_FUNC_perf_event_output:
2107 case BPF_FUNC_perf_event_read_value:
2108 if (map->map_type != BPF_MAP_TYPE_PERF_EVENT_ARRAY)
2111 case BPF_FUNC_get_stackid:
2112 if (map->map_type != BPF_MAP_TYPE_STACK_TRACE)
2115 case BPF_FUNC_current_task_under_cgroup:
2116 case BPF_FUNC_skb_under_cgroup:
2117 if (map->map_type != BPF_MAP_TYPE_CGROUP_ARRAY)
2120 case BPF_FUNC_redirect_map:
2121 if (map->map_type != BPF_MAP_TYPE_DEVMAP &&
2122 map->map_type != BPF_MAP_TYPE_CPUMAP)
2125 case BPF_FUNC_sk_redirect_map:
2126 case BPF_FUNC_msg_redirect_map:
2127 if (map->map_type != BPF_MAP_TYPE_SOCKMAP)
2130 case BPF_FUNC_sock_map_update:
2131 if (map->map_type != BPF_MAP_TYPE_SOCKMAP)
2140 verbose(env, "cannot pass map_type %d into func %s#%d\n",
2141 map->map_type, func_id_name(func_id), func_id);
2145 static bool check_raw_mode_ok(const struct bpf_func_proto *fn)
2149 if (fn->arg1_type == ARG_PTR_TO_UNINIT_MEM)
2151 if (fn->arg2_type == ARG_PTR_TO_UNINIT_MEM)
2153 if (fn->arg3_type == ARG_PTR_TO_UNINIT_MEM)
2155 if (fn->arg4_type == ARG_PTR_TO_UNINIT_MEM)
2157 if (fn->arg5_type == ARG_PTR_TO_UNINIT_MEM)
2160 /* We only support one arg being in raw mode at the moment,
2161 * which is sufficient for the helper functions we have
2167 static bool check_args_pair_invalid(enum bpf_arg_type arg_curr,
2168 enum bpf_arg_type arg_next)
2170 return (arg_type_is_mem_ptr(arg_curr) &&
2171 !arg_type_is_mem_size(arg_next)) ||
2172 (!arg_type_is_mem_ptr(arg_curr) &&
2173 arg_type_is_mem_size(arg_next));
2176 static bool check_arg_pair_ok(const struct bpf_func_proto *fn)
2178 /* bpf_xxx(..., buf, len) call will access 'len'
2179 * bytes from memory 'buf'. Both arg types need
2180 * to be paired, so make sure there's no buggy
2181 * helper function specification.
2183 if (arg_type_is_mem_size(fn->arg1_type) ||
2184 arg_type_is_mem_ptr(fn->arg5_type) ||
2185 check_args_pair_invalid(fn->arg1_type, fn->arg2_type) ||
2186 check_args_pair_invalid(fn->arg2_type, fn->arg3_type) ||
2187 check_args_pair_invalid(fn->arg3_type, fn->arg4_type) ||
2188 check_args_pair_invalid(fn->arg4_type, fn->arg5_type))
2194 static int check_func_proto(const struct bpf_func_proto *fn)
2196 return check_raw_mode_ok(fn) &&
2197 check_arg_pair_ok(fn) ? 0 : -EINVAL;
2200 /* Packet data might have moved, any old PTR_TO_PACKET[_META,_END]
2201 * are now invalid, so turn them into unknown SCALAR_VALUE.
2203 static void __clear_all_pkt_pointers(struct bpf_verifier_env *env,
2204 struct bpf_func_state *state)
2206 struct bpf_reg_state *regs = state->regs, *reg;
2209 for (i = 0; i < MAX_BPF_REG; i++)
2210 if (reg_is_pkt_pointer_any(®s[i]))
2211 mark_reg_unknown(env, regs, i);
2213 for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) {
2214 if (state->stack[i].slot_type[0] != STACK_SPILL)
2216 reg = &state->stack[i].spilled_ptr;
2217 if (reg_is_pkt_pointer_any(reg))
2218 __mark_reg_unknown(reg);
2222 static void clear_all_pkt_pointers(struct bpf_verifier_env *env)
2224 struct bpf_verifier_state *vstate = env->cur_state;
2227 for (i = 0; i <= vstate->curframe; i++)
2228 __clear_all_pkt_pointers(env, vstate->frame[i]);
2231 static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
2234 struct bpf_verifier_state *state = env->cur_state;
2235 struct bpf_func_state *caller, *callee;
2236 int i, subprog, target_insn;
2238 if (state->curframe + 1 >= MAX_CALL_FRAMES) {
2239 verbose(env, "the call stack of %d frames is too deep\n",
2240 state->curframe + 2);
2244 target_insn = *insn_idx + insn->imm;
2245 subprog = find_subprog(env, target_insn + 1);
2247 verbose(env, "verifier bug. No program starts at insn %d\n",
2252 caller = state->frame[state->curframe];
2253 if (state->frame[state->curframe + 1]) {
2254 verbose(env, "verifier bug. Frame %d already allocated\n",
2255 state->curframe + 1);
2259 callee = kzalloc(sizeof(*callee), GFP_KERNEL);
2262 state->frame[state->curframe + 1] = callee;
2264 /* callee cannot access r0, r6 - r9 for reading and has to write
2265 * into its own stack before reading from it.
2266 * callee can read/write into caller's stack
2268 init_func_state(env, callee,
2269 /* remember the callsite, it will be used by bpf_exit */
2270 *insn_idx /* callsite */,
2271 state->curframe + 1 /* frameno within this callchain */,
2272 subprog + 1 /* subprog number within this prog */);
2274 /* copy r1 - r5 args that callee can access */
2275 for (i = BPF_REG_1; i <= BPF_REG_5; i++)
2276 callee->regs[i] = caller->regs[i];
2278 /* after the call regsiters r0 - r5 were scratched */
2279 for (i = 0; i < CALLER_SAVED_REGS; i++) {
2280 mark_reg_not_init(env, caller->regs, caller_saved[i]);
2281 check_reg_arg(env, caller_saved[i], DST_OP_NO_MARK);
2284 /* only increment it after check_reg_arg() finished */
2287 /* and go analyze first insn of the callee */
2288 *insn_idx = target_insn;
2290 if (env->log.level) {
2291 verbose(env, "caller:\n");
2292 print_verifier_state(env, caller);
2293 verbose(env, "callee:\n");
2294 print_verifier_state(env, callee);
2299 static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx)
2301 struct bpf_verifier_state *state = env->cur_state;
2302 struct bpf_func_state *caller, *callee;
2303 struct bpf_reg_state *r0;
2305 callee = state->frame[state->curframe];
2306 r0 = &callee->regs[BPF_REG_0];
2307 if (r0->type == PTR_TO_STACK) {
2308 /* technically it's ok to return caller's stack pointer
2309 * (or caller's caller's pointer) back to the caller,
2310 * since these pointers are valid. Only current stack
2311 * pointer will be invalid as soon as function exits,
2312 * but let's be conservative
2314 verbose(env, "cannot return stack pointer to the caller\n");
2319 caller = state->frame[state->curframe];
2320 /* return to the caller whatever r0 had in the callee */
2321 caller->regs[BPF_REG_0] = *r0;
2323 *insn_idx = callee->callsite + 1;
2324 if (env->log.level) {
2325 verbose(env, "returning from callee:\n");
2326 print_verifier_state(env, callee);
2327 verbose(env, "to caller at %d:\n", *insn_idx);
2328 print_verifier_state(env, caller);
2330 /* clear everything in the callee */
2331 free_func_state(callee);
2332 state->frame[state->curframe + 1] = NULL;
2336 static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn_idx)
2338 const struct bpf_func_proto *fn = NULL;
2339 struct bpf_reg_state *regs;
2340 struct bpf_call_arg_meta meta;
2344 /* find function prototype */
2345 if (func_id < 0 || func_id >= __BPF_FUNC_MAX_ID) {
2346 verbose(env, "invalid func %s#%d\n", func_id_name(func_id),
2351 if (env->ops->get_func_proto)
2352 fn = env->ops->get_func_proto(func_id, env->prog);
2354 verbose(env, "unknown func %s#%d\n", func_id_name(func_id),
2359 /* eBPF programs must be GPL compatible to use GPL-ed functions */
2360 if (!env->prog->gpl_compatible && fn->gpl_only) {
2361 verbose(env, "cannot call GPL only function from proprietary program\n");
2365 /* With LD_ABS/IND some JITs save/restore skb from r1. */
2366 changes_data = bpf_helper_changes_pkt_data(fn->func);
2367 if (changes_data && fn->arg1_type != ARG_PTR_TO_CTX) {
2368 verbose(env, "kernel subsystem misconfigured func %s#%d: r1 != ctx\n",
2369 func_id_name(func_id), func_id);
2373 memset(&meta, 0, sizeof(meta));
2374 meta.pkt_access = fn->pkt_access;
2376 err = check_func_proto(fn);
2378 verbose(env, "kernel subsystem misconfigured func %s#%d\n",
2379 func_id_name(func_id), func_id);
2384 err = check_func_arg(env, BPF_REG_1, fn->arg1_type, &meta);
2387 err = check_func_arg(env, BPF_REG_2, fn->arg2_type, &meta);
2390 if (func_id == BPF_FUNC_tail_call) {
2391 if (meta.map_ptr == NULL) {
2392 verbose(env, "verifier bug\n");
2395 env->insn_aux_data[insn_idx].map_ptr = meta.map_ptr;
2397 err = check_func_arg(env, BPF_REG_3, fn->arg3_type, &meta);
2400 err = check_func_arg(env, BPF_REG_4, fn->arg4_type, &meta);
2403 err = check_func_arg(env, BPF_REG_5, fn->arg5_type, &meta);
2407 /* Mark slots with STACK_MISC in case of raw mode, stack offset
2408 * is inferred from register state.
2410 for (i = 0; i < meta.access_size; i++) {
2411 err = check_mem_access(env, insn_idx, meta.regno, i, BPF_B,
2412 BPF_WRITE, -1, false);
2417 regs = cur_regs(env);
2418 /* reset caller saved regs */
2419 for (i = 0; i < CALLER_SAVED_REGS; i++) {
2420 mark_reg_not_init(env, regs, caller_saved[i]);
2421 check_reg_arg(env, caller_saved[i], DST_OP_NO_MARK);
2424 /* update return register (already marked as written above) */
2425 if (fn->ret_type == RET_INTEGER) {
2426 /* sets type to SCALAR_VALUE */
2427 mark_reg_unknown(env, regs, BPF_REG_0);
2428 } else if (fn->ret_type == RET_VOID) {
2429 regs[BPF_REG_0].type = NOT_INIT;
2430 } else if (fn->ret_type == RET_PTR_TO_MAP_VALUE_OR_NULL) {
2431 struct bpf_insn_aux_data *insn_aux;
2433 regs[BPF_REG_0].type = PTR_TO_MAP_VALUE_OR_NULL;
2434 /* There is no offset yet applied, variable or fixed */
2435 mark_reg_known_zero(env, regs, BPF_REG_0);
2436 regs[BPF_REG_0].off = 0;
2437 /* remember map_ptr, so that check_map_access()
2438 * can check 'value_size' boundary of memory access
2439 * to map element returned from bpf_map_lookup_elem()
2441 if (meta.map_ptr == NULL) {
2443 "kernel subsystem misconfigured verifier\n");
2446 regs[BPF_REG_0].map_ptr = meta.map_ptr;
2447 regs[BPF_REG_0].id = ++env->id_gen;
2448 insn_aux = &env->insn_aux_data[insn_idx];
2449 if (!insn_aux->map_ptr)
2450 insn_aux->map_ptr = meta.map_ptr;
2451 else if (insn_aux->map_ptr != meta.map_ptr)
2452 insn_aux->map_ptr = BPF_MAP_PTR_POISON;
2454 verbose(env, "unknown return type %d of func %s#%d\n",
2455 fn->ret_type, func_id_name(func_id), func_id);
2459 err = check_map_func_compatibility(env, meta.map_ptr, func_id);
2464 clear_all_pkt_pointers(env);
2468 static bool signed_add_overflows(s64 a, s64 b)
2470 /* Do the add in u64, where overflow is well-defined */
2471 s64 res = (s64)((u64)a + (u64)b);
2478 static bool signed_sub_overflows(s64 a, s64 b)
2480 /* Do the sub in u64, where overflow is well-defined */
2481 s64 res = (s64)((u64)a - (u64)b);
2488 static bool check_reg_sane_offset(struct bpf_verifier_env *env,
2489 const struct bpf_reg_state *reg,
2490 enum bpf_reg_type type)
2492 bool known = tnum_is_const(reg->var_off);
2493 s64 val = reg->var_off.value;
2494 s64 smin = reg->smin_value;
2496 if (known && (val >= BPF_MAX_VAR_OFF || val <= -BPF_MAX_VAR_OFF)) {
2497 verbose(env, "math between %s pointer and %lld is not allowed\n",
2498 reg_type_str[type], val);
2502 if (reg->off >= BPF_MAX_VAR_OFF || reg->off <= -BPF_MAX_VAR_OFF) {
2503 verbose(env, "%s pointer offset %d is not allowed\n",
2504 reg_type_str[type], reg->off);
2508 if (smin == S64_MIN) {
2509 verbose(env, "math between %s pointer and register with unbounded min value is not allowed\n",
2510 reg_type_str[type]);
2514 if (smin >= BPF_MAX_VAR_OFF || smin <= -BPF_MAX_VAR_OFF) {
2515 verbose(env, "value %lld makes %s pointer be out of bounds\n",
2516 smin, reg_type_str[type]);
2523 /* Handles arithmetic on a pointer and a scalar: computes new min/max and var_off.
2524 * Caller should also handle BPF_MOV case separately.
2525 * If we return -EACCES, caller may want to try again treating pointer as a
2526 * scalar. So we only emit a diagnostic if !env->allow_ptr_leaks.
2528 static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
2529 struct bpf_insn *insn,
2530 const struct bpf_reg_state *ptr_reg,
2531 const struct bpf_reg_state *off_reg)
2533 struct bpf_verifier_state *vstate = env->cur_state;
2534 struct bpf_func_state *state = vstate->frame[vstate->curframe];
2535 struct bpf_reg_state *regs = state->regs, *dst_reg;
2536 bool known = tnum_is_const(off_reg->var_off);
2537 s64 smin_val = off_reg->smin_value, smax_val = off_reg->smax_value,
2538 smin_ptr = ptr_reg->smin_value, smax_ptr = ptr_reg->smax_value;
2539 u64 umin_val = off_reg->umin_value, umax_val = off_reg->umax_value,
2540 umin_ptr = ptr_reg->umin_value, umax_ptr = ptr_reg->umax_value;
2541 u8 opcode = BPF_OP(insn->code);
2542 u32 dst = insn->dst_reg;
2544 dst_reg = ®s[dst];
2546 if ((known && (smin_val != smax_val || umin_val != umax_val)) ||
2547 smin_val > smax_val || umin_val > umax_val) {
2548 /* Taint dst register if offset had invalid bounds derived from
2549 * e.g. dead branches.
2551 __mark_reg_unknown(dst_reg);
2555 if (BPF_CLASS(insn->code) != BPF_ALU64) {
2556 /* 32-bit ALU ops on pointers produce (meaningless) scalars */
2558 "R%d 32-bit pointer arithmetic prohibited\n",
2563 if (ptr_reg->type == PTR_TO_MAP_VALUE_OR_NULL) {
2564 verbose(env, "R%d pointer arithmetic on PTR_TO_MAP_VALUE_OR_NULL prohibited, null-check it first\n",
2568 if (ptr_reg->type == CONST_PTR_TO_MAP) {
2569 verbose(env, "R%d pointer arithmetic on CONST_PTR_TO_MAP prohibited\n",
2573 if (ptr_reg->type == PTR_TO_PACKET_END) {
2574 verbose(env, "R%d pointer arithmetic on PTR_TO_PACKET_END prohibited\n",
2579 /* In case of 'scalar += pointer', dst_reg inherits pointer type and id.
2580 * The id may be overwritten later if we create a new variable offset.
2582 dst_reg->type = ptr_reg->type;
2583 dst_reg->id = ptr_reg->id;
2585 if (!check_reg_sane_offset(env, off_reg, ptr_reg->type) ||
2586 !check_reg_sane_offset(env, ptr_reg, ptr_reg->type))
2591 /* We can take a fixed offset as long as it doesn't overflow
2592 * the s32 'off' field
2594 if (known && (ptr_reg->off + smin_val ==
2595 (s64)(s32)(ptr_reg->off + smin_val))) {
2596 /* pointer += K. Accumulate it into fixed offset */
2597 dst_reg->smin_value = smin_ptr;
2598 dst_reg->smax_value = smax_ptr;
2599 dst_reg->umin_value = umin_ptr;
2600 dst_reg->umax_value = umax_ptr;
2601 dst_reg->var_off = ptr_reg->var_off;
2602 dst_reg->off = ptr_reg->off + smin_val;
2603 dst_reg->range = ptr_reg->range;
2606 /* A new variable offset is created. Note that off_reg->off
2607 * == 0, since it's a scalar.
2608 * dst_reg gets the pointer type and since some positive
2609 * integer value was added to the pointer, give it a new 'id'
2610 * if it's a PTR_TO_PACKET.
2611 * this creates a new 'base' pointer, off_reg (variable) gets
2612 * added into the variable offset, and we copy the fixed offset
2615 if (signed_add_overflows(smin_ptr, smin_val) ||
2616 signed_add_overflows(smax_ptr, smax_val)) {
2617 dst_reg->smin_value = S64_MIN;
2618 dst_reg->smax_value = S64_MAX;
2620 dst_reg->smin_value = smin_ptr + smin_val;
2621 dst_reg->smax_value = smax_ptr + smax_val;
2623 if (umin_ptr + umin_val < umin_ptr ||
2624 umax_ptr + umax_val < umax_ptr) {
2625 dst_reg->umin_value = 0;
2626 dst_reg->umax_value = U64_MAX;
2628 dst_reg->umin_value = umin_ptr + umin_val;
2629 dst_reg->umax_value = umax_ptr + umax_val;
2631 dst_reg->var_off = tnum_add(ptr_reg->var_off, off_reg->var_off);
2632 dst_reg->off = ptr_reg->off;
2633 if (reg_is_pkt_pointer(ptr_reg)) {
2634 dst_reg->id = ++env->id_gen;
2635 /* something was added to pkt_ptr, set range to zero */
2640 if (dst_reg == off_reg) {
2641 /* scalar -= pointer. Creates an unknown scalar */
2642 verbose(env, "R%d tried to subtract pointer from scalar\n",
2646 /* We don't allow subtraction from FP, because (according to
2647 * test_verifier.c test "invalid fp arithmetic", JITs might not
2648 * be able to deal with it.
2650 if (ptr_reg->type == PTR_TO_STACK) {
2651 verbose(env, "R%d subtraction from stack pointer prohibited\n",
2655 if (known && (ptr_reg->off - smin_val ==
2656 (s64)(s32)(ptr_reg->off - smin_val))) {
2657 /* pointer -= K. Subtract it from fixed offset */
2658 dst_reg->smin_value = smin_ptr;
2659 dst_reg->smax_value = smax_ptr;
2660 dst_reg->umin_value = umin_ptr;
2661 dst_reg->umax_value = umax_ptr;
2662 dst_reg->var_off = ptr_reg->var_off;
2663 dst_reg->id = ptr_reg->id;
2664 dst_reg->off = ptr_reg->off - smin_val;
2665 dst_reg->range = ptr_reg->range;
2668 /* A new variable offset is created. If the subtrahend is known
2669 * nonnegative, then any reg->range we had before is still good.
2671 if (signed_sub_overflows(smin_ptr, smax_val) ||
2672 signed_sub_overflows(smax_ptr, smin_val)) {
2673 /* Overflow possible, we know nothing */
2674 dst_reg->smin_value = S64_MIN;
2675 dst_reg->smax_value = S64_MAX;
2677 dst_reg->smin_value = smin_ptr - smax_val;
2678 dst_reg->smax_value = smax_ptr - smin_val;
2680 if (umin_ptr < umax_val) {
2681 /* Overflow possible, we know nothing */
2682 dst_reg->umin_value = 0;
2683 dst_reg->umax_value = U64_MAX;
2685 /* Cannot overflow (as long as bounds are consistent) */
2686 dst_reg->umin_value = umin_ptr - umax_val;
2687 dst_reg->umax_value = umax_ptr - umin_val;
2689 dst_reg->var_off = tnum_sub(ptr_reg->var_off, off_reg->var_off);
2690 dst_reg->off = ptr_reg->off;
2691 if (reg_is_pkt_pointer(ptr_reg)) {
2692 dst_reg->id = ++env->id_gen;
2693 /* something was added to pkt_ptr, set range to zero */
2701 /* bitwise ops on pointers are troublesome, prohibit. */
2702 verbose(env, "R%d bitwise operator %s on pointer prohibited\n",
2703 dst, bpf_alu_string[opcode >> 4]);
2706 /* other operators (e.g. MUL,LSH) produce non-pointer results */
2707 verbose(env, "R%d pointer arithmetic with %s operator prohibited\n",
2708 dst, bpf_alu_string[opcode >> 4]);
2712 if (!check_reg_sane_offset(env, dst_reg, ptr_reg->type))
2715 __update_reg_bounds(dst_reg);
2716 __reg_deduce_bounds(dst_reg);
2717 __reg_bound_offset(dst_reg);
2721 /* WARNING: This function does calculations on 64-bit values, but the actual
2722 * execution may occur on 32-bit values. Therefore, things like bitshifts
2723 * need extra checks in the 32-bit case.
2725 static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env,
2726 struct bpf_insn *insn,
2727 struct bpf_reg_state *dst_reg,
2728 struct bpf_reg_state src_reg)
2730 struct bpf_reg_state *regs = cur_regs(env);
2731 u8 opcode = BPF_OP(insn->code);
2732 bool src_known, dst_known;
2733 s64 smin_val, smax_val;
2734 u64 umin_val, umax_val;
2735 u64 insn_bitness = (BPF_CLASS(insn->code) == BPF_ALU64) ? 64 : 32;
2737 smin_val = src_reg.smin_value;
2738 smax_val = src_reg.smax_value;
2739 umin_val = src_reg.umin_value;
2740 umax_val = src_reg.umax_value;
2741 src_known = tnum_is_const(src_reg.var_off);
2742 dst_known = tnum_is_const(dst_reg->var_off);
2744 if ((src_known && (smin_val != smax_val || umin_val != umax_val)) ||
2745 smin_val > smax_val || umin_val > umax_val) {
2746 /* Taint dst register if offset had invalid bounds derived from
2747 * e.g. dead branches.
2749 __mark_reg_unknown(dst_reg);
2754 opcode != BPF_ADD && opcode != BPF_SUB && opcode != BPF_AND) {
2755 __mark_reg_unknown(dst_reg);
2761 if (signed_add_overflows(dst_reg->smin_value, smin_val) ||
2762 signed_add_overflows(dst_reg->smax_value, smax_val)) {
2763 dst_reg->smin_value = S64_MIN;
2764 dst_reg->smax_value = S64_MAX;
2766 dst_reg->smin_value += smin_val;
2767 dst_reg->smax_value += smax_val;
2769 if (dst_reg->umin_value + umin_val < umin_val ||
2770 dst_reg->umax_value + umax_val < umax_val) {
2771 dst_reg->umin_value = 0;
2772 dst_reg->umax_value = U64_MAX;
2774 dst_reg->umin_value += umin_val;
2775 dst_reg->umax_value += umax_val;
2777 dst_reg->var_off = tnum_add(dst_reg->var_off, src_reg.var_off);
2780 if (signed_sub_overflows(dst_reg->smin_value, smax_val) ||
2781 signed_sub_overflows(dst_reg->smax_value, smin_val)) {
2782 /* Overflow possible, we know nothing */
2783 dst_reg->smin_value = S64_MIN;
2784 dst_reg->smax_value = S64_MAX;
2786 dst_reg->smin_value -= smax_val;
2787 dst_reg->smax_value -= smin_val;
2789 if (dst_reg->umin_value < umax_val) {
2790 /* Overflow possible, we know nothing */
2791 dst_reg->umin_value = 0;
2792 dst_reg->umax_value = U64_MAX;
2794 /* Cannot overflow (as long as bounds are consistent) */
2795 dst_reg->umin_value -= umax_val;
2796 dst_reg->umax_value -= umin_val;
2798 dst_reg->var_off = tnum_sub(dst_reg->var_off, src_reg.var_off);
2801 dst_reg->var_off = tnum_mul(dst_reg->var_off, src_reg.var_off);
2802 if (smin_val < 0 || dst_reg->smin_value < 0) {
2803 /* Ain't nobody got time to multiply that sign */
2804 __mark_reg_unbounded(dst_reg);
2805 __update_reg_bounds(dst_reg);
2808 /* Both values are positive, so we can work with unsigned and
2809 * copy the result to signed (unless it exceeds S64_MAX).
2811 if (umax_val > U32_MAX || dst_reg->umax_value > U32_MAX) {
2812 /* Potential overflow, we know nothing */
2813 __mark_reg_unbounded(dst_reg);
2814 /* (except what we can learn from the var_off) */
2815 __update_reg_bounds(dst_reg);
2818 dst_reg->umin_value *= umin_val;
2819 dst_reg->umax_value *= umax_val;
2820 if (dst_reg->umax_value > S64_MAX) {
2821 /* Overflow possible, we know nothing */
2822 dst_reg->smin_value = S64_MIN;
2823 dst_reg->smax_value = S64_MAX;
2825 dst_reg->smin_value = dst_reg->umin_value;
2826 dst_reg->smax_value = dst_reg->umax_value;
2830 if (src_known && dst_known) {
2831 __mark_reg_known(dst_reg, dst_reg->var_off.value &
2832 src_reg.var_off.value);
2835 /* We get our minimum from the var_off, since that's inherently
2836 * bitwise. Our maximum is the minimum of the operands' maxima.
2838 dst_reg->var_off = tnum_and(dst_reg->var_off, src_reg.var_off);
2839 dst_reg->umin_value = dst_reg->var_off.value;
2840 dst_reg->umax_value = min(dst_reg->umax_value, umax_val);
2841 if (dst_reg->smin_value < 0 || smin_val < 0) {
2842 /* Lose signed bounds when ANDing negative numbers,
2843 * ain't nobody got time for that.
2845 dst_reg->smin_value = S64_MIN;
2846 dst_reg->smax_value = S64_MAX;
2848 /* ANDing two positives gives a positive, so safe to
2849 * cast result into s64.
2851 dst_reg->smin_value = dst_reg->umin_value;
2852 dst_reg->smax_value = dst_reg->umax_value;
2854 /* We may learn something more from the var_off */
2855 __update_reg_bounds(dst_reg);
2858 if (src_known && dst_known) {
2859 __mark_reg_known(dst_reg, dst_reg->var_off.value |
2860 src_reg.var_off.value);
2863 /* We get our maximum from the var_off, and our minimum is the
2864 * maximum of the operands' minima
2866 dst_reg->var_off = tnum_or(dst_reg->var_off, src_reg.var_off);
2867 dst_reg->umin_value = max(dst_reg->umin_value, umin_val);
2868 dst_reg->umax_value = dst_reg->var_off.value |
2869 dst_reg->var_off.mask;
2870 if (dst_reg->smin_value < 0 || smin_val < 0) {
2871 /* Lose signed bounds when ORing negative numbers,
2872 * ain't nobody got time for that.
2874 dst_reg->smin_value = S64_MIN;
2875 dst_reg->smax_value = S64_MAX;
2877 /* ORing two positives gives a positive, so safe to
2878 * cast result into s64.
2880 dst_reg->smin_value = dst_reg->umin_value;
2881 dst_reg->smax_value = dst_reg->umax_value;
2883 /* We may learn something more from the var_off */
2884 __update_reg_bounds(dst_reg);
2887 if (umax_val >= insn_bitness) {
2888 /* Shifts greater than 31 or 63 are undefined.
2889 * This includes shifts by a negative number.
2891 mark_reg_unknown(env, regs, insn->dst_reg);
2894 /* We lose all sign bit information (except what we can pick
2897 dst_reg->smin_value = S64_MIN;
2898 dst_reg->smax_value = S64_MAX;
2899 /* If we might shift our top bit out, then we know nothing */
2900 if (dst_reg->umax_value > 1ULL << (63 - umax_val)) {
2901 dst_reg->umin_value = 0;
2902 dst_reg->umax_value = U64_MAX;
2904 dst_reg->umin_value <<= umin_val;
2905 dst_reg->umax_value <<= umax_val;
2908 dst_reg->var_off = tnum_lshift(dst_reg->var_off, umin_val);
2910 dst_reg->var_off = tnum_lshift(tnum_unknown, umin_val);
2911 /* We may learn something more from the var_off */
2912 __update_reg_bounds(dst_reg);
2915 if (umax_val >= insn_bitness) {
2916 /* Shifts greater than 31 or 63 are undefined.
2917 * This includes shifts by a negative number.
2919 mark_reg_unknown(env, regs, insn->dst_reg);
2922 /* BPF_RSH is an unsigned shift. If the value in dst_reg might
2923 * be negative, then either:
2924 * 1) src_reg might be zero, so the sign bit of the result is
2925 * unknown, so we lose our signed bounds
2926 * 2) it's known negative, thus the unsigned bounds capture the
2928 * 3) the signed bounds cross zero, so they tell us nothing
2930 * If the value in dst_reg is known nonnegative, then again the
2931 * unsigned bounts capture the signed bounds.
2932 * Thus, in all cases it suffices to blow away our signed bounds
2933 * and rely on inferring new ones from the unsigned bounds and
2934 * var_off of the result.
2936 dst_reg->smin_value = S64_MIN;
2937 dst_reg->smax_value = S64_MAX;
2939 dst_reg->var_off = tnum_rshift(dst_reg->var_off,
2942 dst_reg->var_off = tnum_rshift(tnum_unknown, umin_val);
2943 dst_reg->umin_value >>= umax_val;
2944 dst_reg->umax_value >>= umin_val;
2945 /* We may learn something more from the var_off */
2946 __update_reg_bounds(dst_reg);
2949 mark_reg_unknown(env, regs, insn->dst_reg);
2953 if (BPF_CLASS(insn->code) != BPF_ALU64) {
2954 /* 32-bit ALU ops are (32,32)->32 */
2955 coerce_reg_to_size(dst_reg, 4);
2956 coerce_reg_to_size(&src_reg, 4);
2959 __reg_deduce_bounds(dst_reg);
2960 __reg_bound_offset(dst_reg);
2964 /* Handles ALU ops other than BPF_END, BPF_NEG and BPF_MOV: computes new min/max
2967 static int adjust_reg_min_max_vals(struct bpf_verifier_env *env,
2968 struct bpf_insn *insn)
2970 struct bpf_verifier_state *vstate = env->cur_state;
2971 struct bpf_func_state *state = vstate->frame[vstate->curframe];
2972 struct bpf_reg_state *regs = state->regs, *dst_reg, *src_reg;
2973 struct bpf_reg_state *ptr_reg = NULL, off_reg = {0};
2974 u8 opcode = BPF_OP(insn->code);
2976 dst_reg = ®s[insn->dst_reg];
2978 if (dst_reg->type != SCALAR_VALUE)
2980 if (BPF_SRC(insn->code) == BPF_X) {
2981 src_reg = ®s[insn->src_reg];
2982 if (src_reg->type != SCALAR_VALUE) {
2983 if (dst_reg->type != SCALAR_VALUE) {
2984 /* Combining two pointers by any ALU op yields
2985 * an arbitrary scalar. Disallow all math except
2986 * pointer subtraction
2988 if (opcode == BPF_SUB){
2989 mark_reg_unknown(env, regs, insn->dst_reg);
2992 verbose(env, "R%d pointer %s pointer prohibited\n",
2994 bpf_alu_string[opcode >> 4]);
2997 /* scalar += pointer
2998 * This is legal, but we have to reverse our
2999 * src/dest handling in computing the range
3001 return adjust_ptr_min_max_vals(env, insn,
3004 } else if (ptr_reg) {
3005 /* pointer += scalar */
3006 return adjust_ptr_min_max_vals(env, insn,
3010 /* Pretend the src is a reg with a known value, since we only
3011 * need to be able to read from this state.
3013 off_reg.type = SCALAR_VALUE;
3014 __mark_reg_known(&off_reg, insn->imm);
3016 if (ptr_reg) /* pointer += K */
3017 return adjust_ptr_min_max_vals(env, insn,
3021 /* Got here implies adding two SCALAR_VALUEs */
3022 if (WARN_ON_ONCE(ptr_reg)) {
3023 print_verifier_state(env, state);
3024 verbose(env, "verifier internal error: unexpected ptr_reg\n");
3027 if (WARN_ON(!src_reg)) {
3028 print_verifier_state(env, state);
3029 verbose(env, "verifier internal error: no src_reg\n");
3032 return adjust_scalar_min_max_vals(env, insn, dst_reg, *src_reg);
3035 /* check validity of 32-bit and 64-bit arithmetic operations */
3036 static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
3038 struct bpf_reg_state *regs = cur_regs(env);
3039 u8 opcode = BPF_OP(insn->code);
3042 if (opcode == BPF_END || opcode == BPF_NEG) {
3043 if (opcode == BPF_NEG) {
3044 if (BPF_SRC(insn->code) != 0 ||
3045 insn->src_reg != BPF_REG_0 ||
3046 insn->off != 0 || insn->imm != 0) {
3047 verbose(env, "BPF_NEG uses reserved fields\n");
3051 if (insn->src_reg != BPF_REG_0 || insn->off != 0 ||
3052 (insn->imm != 16 && insn->imm != 32 && insn->imm != 64) ||
3053 BPF_CLASS(insn->code) == BPF_ALU64) {
3054 verbose(env, "BPF_END uses reserved fields\n");
3059 /* check src operand */
3060 err = check_reg_arg(env, insn->dst_reg, SRC_OP);
3064 if (is_pointer_value(env, insn->dst_reg)) {
3065 verbose(env, "R%d pointer arithmetic prohibited\n",
3070 /* check dest operand */
3071 err = check_reg_arg(env, insn->dst_reg, DST_OP);
3075 } else if (opcode == BPF_MOV) {
3077 if (BPF_SRC(insn->code) == BPF_X) {
3078 if (insn->imm != 0 || insn->off != 0) {
3079 verbose(env, "BPF_MOV uses reserved fields\n");
3083 /* check src operand */
3084 err = check_reg_arg(env, insn->src_reg, SRC_OP);
3088 if (insn->src_reg != BPF_REG_0 || insn->off != 0) {
3089 verbose(env, "BPF_MOV uses reserved fields\n");
3094 /* check dest operand */
3095 err = check_reg_arg(env, insn->dst_reg, DST_OP);
3099 if (BPF_SRC(insn->code) == BPF_X) {
3100 if (BPF_CLASS(insn->code) == BPF_ALU64) {
3102 * copy register state to dest reg
3104 regs[insn->dst_reg] = regs[insn->src_reg];
3105 regs[insn->dst_reg].live |= REG_LIVE_WRITTEN;
3108 if (is_pointer_value(env, insn->src_reg)) {
3110 "R%d partial copy of pointer\n",
3114 mark_reg_unknown(env, regs, insn->dst_reg);
3115 coerce_reg_to_size(®s[insn->dst_reg], 4);
3119 * remember the value we stored into this reg
3121 regs[insn->dst_reg].type = SCALAR_VALUE;
3122 if (BPF_CLASS(insn->code) == BPF_ALU64) {
3123 __mark_reg_known(regs + insn->dst_reg,
3126 __mark_reg_known(regs + insn->dst_reg,
3131 } else if (opcode > BPF_END) {
3132 verbose(env, "invalid BPF_ALU opcode %x\n", opcode);
3135 } else { /* all other ALU ops: and, sub, xor, add, ... */
3137 if (BPF_SRC(insn->code) == BPF_X) {
3138 if (insn->imm != 0 || insn->off != 0) {
3139 verbose(env, "BPF_ALU uses reserved fields\n");
3142 /* check src1 operand */
3143 err = check_reg_arg(env, insn->src_reg, SRC_OP);
3147 if (insn->src_reg != BPF_REG_0 || insn->off != 0) {
3148 verbose(env, "BPF_ALU uses reserved fields\n");
3153 /* check src2 operand */
3154 err = check_reg_arg(env, insn->dst_reg, SRC_OP);
3158 if ((opcode == BPF_MOD || opcode == BPF_DIV) &&
3159 BPF_SRC(insn->code) == BPF_K && insn->imm == 0) {
3160 verbose(env, "div by zero\n");