优化的地方如下:
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 5159c7a229229..7c130001fbfe7 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -1234,13 +1234,11 @@ bool ex_handler_bpf(const struct exception_table_entry *x, struct pt_regs *regs)
}
static void detect_reg_usage(struct bpf_insn *insn, int insn_cnt,
- bool *regs_used, bool *tail_call_seen)
+ bool *regs_used)
{
int i;
for (i = 1; i <= insn_cnt; i++, insn++) {
- if (insn->code == (BPF_JMP | BPF_TAIL_CALL))
- *tail_call_seen = true;
if (insn->dst_reg == BPF_REG_6 || insn->src_reg == BPF_REG_6)
regs_used[0] = true;
if (insn->dst_reg == BPF_REG_7 || insn->src_reg == BPF_REG_7)
@@ -1324,7 +1322,6 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, u8 *rw_image
struct bpf_insn *insn = bpf_prog->insnsi;
bool callee_regs_used[4] = {};
int insn_cnt = bpf_prog->len;
- bool tail_call_seen = false;
bool seen_exit = false;
u8 temp[BPF_MAX_INSN_SIZE + BPF_INSN_SAFETY];
u64 arena_vm_start, user_vm_start;
@@ -1336,11 +1333,7 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, u8 *rw_image
arena_vm_start = bpf_arena_get_kern_vm_start(bpf_prog->aux->arena);
user_vm_start = bpf_arena_get_user_vm_start(bpf_prog->aux->arena);
- detect_reg_usage(insn, insn_cnt, callee_regs_used,
- &tail_call_seen);
-
- /* tail call's presence in current prog implies it is reachable */
- tail_call_reachable |= tail_call_seen;
+ detect_reg_usage(insn, insn_cnt, callee_regs_used);
emit_prologue(&prog, bpf_prog->aux->stack_depth,
bpf_prog_was_classic(bpf_prog), tail_call_reachable,
在 x86 BPF JIT 中,detect_reg_usage()
函数中不再需要检查 tail_call_seen
了,因为在 verifier 里已能够正确地提供 tail_call_reachable
信息了:
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 81a3d2ced78d5..d7045676246a7 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -2982,8 +2982,10 @@ static int check_subprogs(struct bpf_verifier_env *env)
if (code == (BPF_JMP | BPF_CALL) &&
insn[i].src_reg == 0 &&
- insn[i].imm == BPF_FUNC_tail_call)
+ insn[i].imm == BPF_FUNC_tail_call) {
subprog[cur_subprog].has_tail_call = true;
+ subprog[cur_subprog].tail_call_reachable = true;
+ }
if (BPF_CLASS(code) == BPF_LD &&
(BPF_MODE(code) == BPF_ABS || BPF_MODE(code) == BPF_IND))
subprog[cur_subprog].has_ld_abs = true;
为什么如此修改便能优化掉 detect_reg_usage()
中的 tail_call_seen
了呢?
因为 verifier 在进行 JIT 前,是如此给 JIT 提供 tail_call_reachable
信息:
// https://github.com/torvalds/linux/blob/c3f2d783a459980eafd24c5af94ccd56a615961f/kernel/bpf/verifier.c#L19365
static int jit_subprogs(struct bpf_verifier_env *env)
{
// ...
for (i = 0; i < env->subprog_cnt; i++) {
// ...
func[i]->aux->tail_call_reachable = env->subprog_info[i].tail_call_reachable;
// ...
func[i] = bpf_int_jit_compile(func[i]);
if (!func[i]->jited) {
err = -ENOTSUPP;
goto out_free;
}
cond_resched();
}
// ...
}