目录
参考
https://docs.kernel.org/trace/user_events.html
测试程序
tools/testing/selftests/user_events/ftrace_test.c
正文
通过user_event可以实现对应用程序的跟踪,类似linux内核中的tracepoint那样。相似的方法还有借助/sys/kernel/debug/tracing/trace_marker
,不过,user_event提供的方法功能更加强大,可以配合perf等工具使用。
要使用user_event,大致要经过:注册、使能跟踪、写入跟踪信息、读取跟踪信息、关闭跟踪、反注册、删除。需要用到的文件节点主要是:/sys/kernel/tracing/user_events_data
。
打开文件节点
data_fd = open(data_file, O_RDWR);
注册和反注册
注册阶段用到的结构体是user_reg:
struct user_reg { /* Input:固定为sizeof(user_reg)*/ __u32 size; /* Input: 使用下面的enable_addr的哪个bit来显示使能状态 */ __u8 enable_bit; /* Input: 下面的enable_addr的字节长度,4或者8 */ __u8 enable_size; /* Input: 目前还用不到,设置为0即可 */ __u16 flags; /* Input: 应用通过这个地址来查看该user_event事件是否使能*/ __u64 enable_addr; /* Input: 事件描述信息*/ __u64 name_args; /* Output: 应用通过该事件索引向内核中写入事件内容 */ __u32 write_index; } __attribute__((__packed__));
反注册用到的结构体是:
struct user_unreg { /* Input: 固定为sizeof(user_unreg) */ __u32 size; /* Input: Bit to unregister */ __u8 disable_bit; /* Input: Reserved, set to 0 */ __u8 __reserved; /* Input: Reserved, set to 0 */ __u16 __reserved2; /* Input: Address to unregister */ __u64 disable_addr; } __attribute__((__packed__));
比如以下面的代码为例:
// 这两个结构体必须初始化为0 struct user_reg reg = {0}; struct user_unreg unreg = {0}; u32 check; reg.size = sizeof(reg); reg.name_args = (__u64)"__test_event0 u32 id; u32 age; char type; char[32] name"; reg.enable_bit = 31; // 通过check的bit31来查看使能状态,不同的事件可以使用同一个变量 reg.enable_addr = (__u64)✓ reg.enable_size = sizeof(check); ioctl(data_fd, DIAG_IOCSREG, ®); // 如果传入的reg的内容一样,那么返回的write_index也一样 event0_index = reg.write_index; reg.size = sizeof(reg); reg.name_args = (__u64)"__test_event1 u32 money"; reg.enable_bit = 30; // 通过check的bit30来查看使能状态 reg.enable_addr = (__u64)check; reg.enable_size = sizeof(check); ioctl(data_fd, DIAG_IOCSREG, ®); // 如果传入的reg的内容一样,那么返回的write_index也一样 event1_index = reg.write_index;
注册成功后,可以看到注册的事件:
- 在
/sys/kernel/debug/tracing/dynamic_events
下面:
- 在
/sys/kernel/debug/tracing/events/user_events
下面:
- 可以查看一下format的内容,跟注册时设置的
name_args
是一致的
user_events/__test_event0:
user_events/__test_event1:
使能跟踪和关闭跟踪
/sys/kernel/debug/tracing/events/user_events/__test_event0/enable
或者
/sys/kernel/debug/tracing/events/user_events/__test_event1/enable
当改变上面的enable
文件节点的值时,check变量的相应bit会随之改变。具体是:
向__test_event0/enable
写入0,那么check变量的bit31是0,向__test_event0/enable
写入1,那么check变量的bit31是1.
向__test_event1/enable
写入0,那么check变量的bit30是0,向__test_event0/enable
写入1,那么check变量的bit30是1.
写入事件
char name[20] = "I am event0"; io[0].iov_base = &event0_index; io[0].iov_len = sizeof(event0_index); io[1].iov_base = &id; io[1].iov_len = sizeof(id); io[2].iov_base = &age; io[2].iov_len = sizeof(age); io[3].iov_base = &type; io[3].iov_len = sizeof(type); io[4].iov_base = name; io[4].iov_len = sizeof(name); // 使能 system("echo 1 > /sys/kernel/debug/tracing/events/user_events/__test_event0/enable"); // 写入 writev(data_fd, (const struct iovec *)io, 5); io[0].iov_base = &event1_index; io[0].iov_len = sizeof(event1_index); io[1].iov_base = &money; io[1].iov_len = sizeof(money); // 使能 system("echo 1 > /sys/kernel/debug/tracing/events/user_events/__test_event1/enable"); // 写入 writev(data_fd, (const struct iovec *)io, 2);
读取跟踪日志、设置过滤以及trigger
# perf trace -e user_events:__test_event0 -e user_events:__test_event1
或者
# cat /sys/kernel/debug/tracing/trace_pipe
可以看到如下日志:
此外,我们还可以利用这个event的filter
和trigger
节点控制跟踪的条件:
比如对__test_event0实现下面的两个控制:
- 当age大于10,小于15时,不记录跟踪日志
- 当age等于20时,记录调用栈
下面是实现这两个条件的命令:
# cd /sys/kernel/debug/tracing/events/user_events/__test_event0 # echo 'age <= 10 || age >= 15' > filter # cat filter age <= 10 || age >= 15 # echo 'stacktrace if age == 20' > trigger # cat trigger stacktrace:unlimited if age == 20
下面是跟踪日志:
... ... user_events_dem-542 [003] ..... 1125.702619: __test_event0: name=I am event0 type='E'(69) age=0x6 (6) id=0x10 (16) user_events_dem-542 [003] ..... 1125.702837: __test_event1: name=I am event1 money=0x6 (6) user_events_dem-542 [003] ..... 1128.141755: __test_event0: name=I am event0 type='E'(69) age=0x7 (7) id=0x10 (16) user_events_dem-542 [003] ..... 1128.141953: __test_event1: name=I am event1 money=0x7 (7) user_events_dem-542 [003] ..... 1129.022463: __test_event0: name=I am event0 type='E'(69) age=0x8 (8) id=0x10 (16) user_events_dem-542 [003] ..... 1129.022682: __test_event1: name=I am event1 money=0x8 (8) user_events_dem-542 [003] ..... 1130.070759: __test_event0: name=I am event0 type='E'(69) age=0x9 (9) id=0x10 (16) user_events_dem-542 [003] ..... 1130.070904: __test_event1: name=I am event1 money=0x9 (9) user_events_dem-542 [003] ..... 1130.630082: __test_event0: name=I am event0 type='E'(69) age=0xa (10) id=0x10 (16) user_events_dem-542 [003] ..... 1130.630428: __test_event1: name=I am event1 money=0xa (10) user_events_dem-542 [003] ..... 1131.285824: __test_event1: name=I am event1 money=0xb (11) user_events_dem-542 [003] ..... 1132.414520: __test_event1: name=I am event1 money=0xc (12) user_events_dem-542 [003] ..... 1133.022232: __test_event1: name=I am event1 money=0xd (13) user_events_dem-542 [003] ..... 1133.727074: __test_event1: name=I am event1 money=0xe (14) user_events_dem-542 [003] ..... 1134.694207: __test_event0: name=I am event0 type='E'(69) age=0xf (15) id=0x10 (16) user_events_dem-542 [003] ..... 1134.694394: __test_event1: name=I am event1 money=0xf (15) user_events_dem-542 [003] ..... 1136.030015: __test_event0: name=I am event0 type='E'(69) age=0x10 (16) id=0x10 (16) user_events_dem-542 [003] ..... 1136.030468: __test_event1: name=I am event1 money=0x10 (16) user_events_dem-542 [003] ..... 1136.630760: __test_event0: name=I am event0 type='E'(69) age=0x11 (17) id=0x10 (16) user_events_dem-542 [003] ..... 1136.630908: __test_event1: name=I am event1 money=0x11 (17) user_events_dem-542 [000] ..... 1137.221809: __test_event0: name=I am event0 type='E'(69) age=0x12 (18) id=0x10 (16) user_events_dem-542 [000] ..... 1137.221880: __test_event1: name=I am event1 money=0x12 (18) user_events_dem-542 [000] ..... 1137.766662: __test_event0: name=I am event0 type='E'(69) age=0x13 (19) id=0x10 (16) user_events_dem-542 [000] ..... 1137.766844: __test_event1: name=I am event1 money=0x13 (19) user_events_dem-542 [000] ..... 1138.261930: __test_event0: name=I am event0 type='E'(69) age=0x14 (20) id=0x10 (16) user_events_dem-542 [000] ...1. 1138.262064: <stack trace> => user_event_ftrace => user_events_write_core.isra.22 => do_iter_readv_writev => do_iter_write => vfs_writev => do_writev => do_syscall_64 => entry_SYSCALL_64_after_hwframe user_events_dem-542 [000] ..... 1138.262076: __test_event1: name=I am event1 money=0x14 (20) user_events_dem-542 [000] ..... 1138.925935: __test_event0: name=I am event0 type='E'(69) age=0x15 (21) id=0x10 (16) user_events_dem-542 [000] ..... 1138.926231: __test_event1: name=I am event1 money=0x15 (21) user_events_dem-542 [000] ..... 1139.350572: __test_event0: name=I am event0 type='E'(69) age=0x16 (22) id=0x10 (16) user_events_dem-542 [000] ..... 1139.350825: __test_event1: name=I am event1 money=0x16 (22) ... ...
注意:
需要先把__test_event0
的filter和trigger清空才能反注册和删除这个probe。
echo > filter echo '!stacktrace' > trigger
反注册
unreg.size = sizeof(unreg); unreg.disable_bit = 31; unreg.disable_addr = (__u64)✓ ioctl(data_fd, DIAG_IOCSUNREG, &unreg); unreg.size = sizeof(unreg); unreg.disable_bit = 30; unreg.disable_addr = (__u64)✓ ioctl(data_fd, DIAG_IOCSUNREG, &unreg);
删除
// 先关闭 close(data_fd); data_fd = open(data_file, O_RDWR); ioctl(data_fd, DIAG_IOCSDEL, "__test_event0"); ioctl(data_fd, DIAG_IOCSDEL, "__test_event1");
下面是完整的测试程序:
#include <errno.h> #include <sys/ioctl.h> #include <sys/mman.h> #include <sys/uio.h> #include <fcntl.h> #include <stdio.h> #include <unistd.h> #include <linux/user_events.h> #include <stdlib.h> #define BIT(n) (1ULL << (n)) const char *data_file = "/sys/kernel/tracing/user_events_data"; int enabled = 0; int main(int argc, const char *argv[]) { int data_fd; struct user_reg reg = {0}; struct user_unreg unreg = {0}; struct iovec io[5]; __u32 check, event0_index, event1_index; int id, age = 0, money = 0; char type = 'E', ch; data_fd = open(data_file, O_RDWR); if (data_fd < 0) { perror("Open failed\n"); return -1; } reg.size = sizeof(reg); reg.name_args = (__u64)"__test_event0 u32 id; u32 age; char type; char[32] name"; reg.enable_bit = 31; // 通过check的bit31来查看使能状态,不同的事件可以使用同一个变量 reg.enable_addr = (__u64)✓ reg.enable_size = sizeof(check); if (ioctl(data_fd, DIAG_IOCSREG, ®) < 0) { printf("reg event0 failed\n"); goto out; } event0_index = reg.write_index; printf("event0_index: %d\n", event0_index); reg.name_args = (__u64)"__test_event1 u32 money; char[32] name"; reg.enable_bit = 30; // 通过check的bit30来查看使能状态 if (ioctl(data_fd, DIAG_IOCSREG, ®) < 0) { printf("reg event1 failed\n"); goto out; } event1_index = reg.write_index; printf("event1_index: %d\n", event1_index); system("echo 1 > /sys/kernel/debug/tracing/events/user_events/__test_event0/enable"); system("echo 1 > /sys/kernel/debug/tracing/events/user_events/__test_event1/enable"); loop: printf("Press enter to write trace data, enter q to exit ..."); ch = getchar(); if (ch == 'q') { system("echo 0 > /sys/kernel/debug/tracing/events/user_events/__test_event0/enable"); system("echo 0 > /sys/kernel/debug/tracing/events/user_events/__test_event1/enable"); unreg.size = sizeof(unreg); unreg.disable_bit = 31; unreg.disable_addr = (__u64)✓ if (ioctl(data_fd, DIAG_IOCSUNREG, &unreg) < 0) { printf("unreg event0 failed\n"); goto out; } printf("unreg event0 success\n"); unreg.disable_bit = 30; if (ioctl(data_fd, DIAG_IOCSUNREG, &unreg) < 0) { printf("unreg event1 failed\n"); goto out; } printf("unreg event1 success\n"); close(data_fd); data_fd = open(data_file, O_RDWR); if (ioctl(data_fd, DIAG_IOCSDEL, "__test_event0") < 0) { printf("del event0 failed\n"); goto out; } printf("del event0 success\n"); if (ioctl(data_fd, DIAG_IOCSDEL, "__test_event1") < 0) { printf("del event1 failed\n"); goto out; } printf("del event1 success\n"); close(data_fd); return 0; } if (check & BIT(31)) { char name[32] = "I am event0"; id = 0x10; io[0].iov_base = &event0_index; io[0].iov_len = sizeof(event0_index); io[1].iov_base = &id; io[1].iov_len = sizeof(id); io[2].iov_base = &age; io[2].iov_len = sizeof(age); io[3].iov_base = &type; io[3].iov_len = sizeof(type); io[4].iov_base = name; io[4].iov_len = sizeof(name); printf("\t write event0\n"); writev(data_fd, (const struct iovec *)io, 5); age++; } if (check & BIT(30)) { char name[32] = "I am event1"; io[0].iov_base = &event1_index; io[0].iov_len = sizeof(event1_index); io[1].iov_base = &money; io[1].iov_len = sizeof(money); io[2].iov_base = &name; io[2].iov_len = sizeof(name); printf("\t write event1\n"); writev(data_fd, (const struct iovec *)io, 3); money++; } goto loop; out: return 0; }