接着是跟踪网卡的 net.ipv4.conf.*.* sysctl 的配置变更。
ipv4 sysctl 配置变更的内核函数
ipv4 sysctl 配置变更的方式如下:
echo 1 > /proc/sys/net/ipv4/conf/all/${devconf}
。echo 1 > /proc/sys/net/ipv4/conf/default/${devconf}
。echo 1 > /proc/sys/net/ipv4/conf/${NET_DEV}/${devconf}
。
直接查看对应的内核源代码 net/ipv4/devinet.c
吧。
找到如下源代码:
// https://github.com/torvalds/linux/blob/master/net/ipv4/devinet.c
static int devinet_conf_proc(struct ctl_table *ctl, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
int old_value = *(int *)ctl->data;
int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
int new_value = *(int *)ctl->data;
if (write) {
struct ipv4_devconf *cnf = ctl->extra1;
struct net *net = ctl->extra2;
int i = (int *)ctl->data - cnf->data;
int ifindex;
set_bit(i, cnf->state);
if (i == IPV4_DEVCONF_RP_FILTER - 1 &&
new_value != old_value) {
ifindex = devinet_conf_ifindex(net, cnf);
inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
NETCONFA_RP_FILTER,
ifindex, cnf);
}
// ...
}
}
return ret;
}
static int devinet_sysctl_forward(struct ctl_table *ctl, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
// ...
ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
if (write && *valp != val) {
// ...
}
return ret;
}
static int ipv4_doint_and_flush(struct ctl_table *ctl, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
// ...
if (write && *valp != val)
rt_cache_flush(net);
return ret;
}
static struct devinet_sysctl_table {
struct ctl_table_header *sysctl_header;
struct ctl_table devinet_vars[IPV4_DEVCONF_MAX];
} devinet_sysctl = {
.devinet_vars = {
DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
devinet_sysctl_forward),
DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
DEVINET_SYSCTL_RW_ENTRY(BC_FORWARDING, "bc_forwarding"),
// ...
DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
// ...
},
};
使用 bpftrace
确认一下:
# bpftrace -l 'k:devinet_conf_proc'
kprobe:devinet_conf_proc
# bpftrace -l 'k:devinet_sysctl_forward'
kprobe:devinet_sysctl_forward
# bpftrace -l 'k:ipv4_doint_and_flush'
kprobe:ipv4_doint_and_flush
# bpftrace -e 'k:devinet_conf_proc, k:devinet_sysctl_forward, k:ipv4_doint_and_flush { printf("ipv4 sysctl: %s\n", comm); }'
Attaching 3 probes...
分析其中的 devinet_conf_proc
函数:
sysctl 配置变更的值是通过 ctl->data
获取的,类型是int
。sysctl 配置项的索引通过 (int *)ctl->data - cnf->data
获取的;需要+1
后才是正确的索引定义。sysctl 配置的 ifindex 是通过 devinet_conf_ifindex()
获取的,可能是ALL
/DEFAULT
/ifindex
。
跟踪 ipv4 sysctl 配置变更函数
直接对 devinet_conf_proc
、devinet_sysctl_forward
、ipv4_doint_and_flush
进行 fexit
跟踪,以便观测配置变更的情况。
static __always_inline int
__fexit(void *ctx, struct ctl_table *ctl, int write, void *buffer, size_t *lenp,
loff_t *ppos, int retval)
{
struct ipv4_devconf *cnf;
struct event event = {};
struct net *net;
if (retval || !write)
return BPF_OK;
// Only interested in writing devinet conf.
cnf = (typeof(cnf)) BPF_CORE_READ(ctl, extra1);
net = (typeof(net)) BPF_CORE_READ(ctl, extra2);
event.ifindex = devinet_conf_ifindex(net, cnf);
event.cnf_data_ptr = ((__u64) cnf) + offsetof(struct ipv4_devconf, data);
event.ctl_data_ptr = (__u64) BPF_CORE_READ(ctl, data);
bpf_probe_read_kernel(&event.devconf_value, sizeof(event.devconf_value),
BPF_CORE_READ(ctl, data));
handle_event(ctx, &event);
return BPF_OK;
}
SEC("fexit/devinet_conf_proc")
int BPF_PROG(fexit_devinet_conf_proc, struct ctl_table *ctl, int write,
void *buffer, size_t *lenp, loff_t *ppos, int retval)
{
return __fexit(ctx, ctl, write, buffer, lenp, ppos, retval);
}
SEC("fexit/ipv4_doint_and_flush")
int BPF_PROG(fexit_ipv4_doint_and_flush, struct ctl_table *ctl, int write,
void *buffer, size_t *lenp, loff_t *ppos, int retval)
{
return __fexit(ctx, ctl, write, buffer, lenp, ppos, retval);
}
SEC("fexit/devinet_sysctl_forward")
int BPF_PROG(fexit_devinet_sysctl_forward, struct ctl_table *ctl, int write,
void *buffer, size_t *lenp, loff_t *ppos, int retval)
{
return __fexit(ctx, ctl, write, buffer, lenp, ppos, retval);
}
其中,不要傻傻地在 bpf 里计算配置项索引,而留待用户态程序计算。
跑起来后:
$ sudo echo 0 > /proc/sys/net/ipv4/conf/all/forwarding
$ sudo echo 0 > /proc/sys/net/ipv4/conf/default/forwarding
# or
$ sudo sysctl -w net.ipv4.conf.all.forwarding=0
net.ipv4.conf.all.forwarding = 0
$ sudo sysctl -w net.ipv4.conf.default.forwarding=0
net.ipv4.conf.default.forwarding = 0
$ sudo ./fexit_ipv4_sysctl
2024/07/07 13:35:21 Attached fexit(devinet_conf_proc)
2024/07/07 13:35:21 Attached fexit(ipv4_doint_and_flush)
2024/07/07 13:35:21 Attached fexit(devinet_sysctl_forward)
2024/07/07 13:35:23 Update forwarding to 0 on interface ALL(-1) by process /usr/bin/zsh
2024/07/07 13:35:26 Update forwarding to 0 on interface DEFAULT(-2) by process /usr/bin/zsh
完整的源代码:fexit_ipv4_sysctl[1]。
总结
跟踪 ipv4 sysctl 配置变更的方式和跟踪 RPS/XPS 配置变更的方式类似,使用 fexit
而不是 kprobe
。
在 bpf 代码里,需要注意 ifindex 的获取,以及配置项索引的计算。
fexit_ipv4_sysctl: https://github.com/Asphaltt/learn-by-example/tree/main/ebpf/fexit_ipv4_sysctl