内核源码中放置很多静态跟踪节点,这些节点可以被关联到对应的回调函数。当我想要调试某个子系统/模块时,通过debug系统将对应的节点开启(将回调函数挂钩子到静态跟踪点上,与之关联),这样内核在执行到跟踪点位置的时候会调用钩子函数,最终执行结果将被输出写到一个环形日志缓存区里,通过debug系统查看信息。
我把它当作一种内核层面的调试手段。当我想要调试某一个模块,开启对应的节点就好了,trace event基于现有的跟踪节点(一般是写代码的添加好的)效率高些,或者解决新的bug时将关键信息固化到调试系统里。
接下来学习 trace_event 的工作原理和使用方法,希望未来能积累足够多的使用案例。
mount -t tracefs nodev /sys/kernel/tracing # 挂载tracefs
cat /sys/kernel/debug/tracing/available_events # 查看当前支持的跟踪事件列表
echo 1 > /sys/kernel/debug/tracing/events/gsgpu/enable && \
echo 0 > /sys/kernel/debug/tracing/trace && \
/root/run_test.sh ; cp /sys/kernel/debug/tracing/trace /test.txt && \
echo 0 > /sys/kernel/debug/tracing/events/gsgpu/enable
cat /test.txt
echo 0 > /sys/kernel/debug/tracing/events/gsgpu/enable && \
echo 1 > /sys/kernel/debug/tracing/events/gsgpu/gsgpu_bo_move/enable && \
echo 0 > /sys/kernel/debug/tracing/trace && \
echo "bo_size >= 50000" > /sys/kernel/debug/tracing/events/gsgpu/gsgpu_bo_move/filter && \
/root/run_test.sh ; cp /sys/kernel/debug/tracing/trace /test.txt && \
echo 0 > /sys/kernel/debug/tracing/events/gsgpu/gsgpu_bo_move/filter
cat /test.txt
echo 0 > /sys/kernel/debug/tracing/events/gsgpu/enable && \
echo 1 > /sys/kernel/debug/tracing/events/gsgpu/gsgpu_bo_move/enable && \
echo 0 > /sys/kernel/debug/tracing/trace && \
echo 'stacktrace' > /sys/kernel/debug/tracing/events/gsgpu/gsgpu_bo_move/trigger && \
/root/run_test.sh ; cp /sys/kernel/debug/tracing/trace /test.txt && \
echo '!stacktrace' > /sys/kernel/debug/tracing/events/gsgpu/gsgpu_bo_move/trigger && \
cat /test.txt
# tracer: nop
#
# entries-in-buffer/entries-written: 20103/1994095 #P:4
#
# _-----=> irqs-off
# / _----=> need-resched
# | / _---=> hardirq/softirq
# || / _--=> preempt-depth
# ||| / delay
# TASK-PID CPU# |||| TIMESTAMP FUNCTION
# | | | |||| | |-0 [002] d.h. 6972.008532: gsgpu_iv: client_id:4 src_id:0 ring:0 vmid:0 timestamp: 10376293560622087932 pasid:2415919108 src_data: 00000000 76089440 90000004 00000000
root@loongson-pc:/sys/kernel/debug/tracing/events/gsgpu# cat ./gsgpu_iv/format
name: gsgpu_iv
ID: 1903
format:field:unsigned short common_type; offset:0; size:2; signed:0;field:unsigned char common_flags; offset:2; size:1; signed:0;field:unsigned char common_preempt_count; offset:3; size:1; signed:0;field:int common_pid; offset:4; size:4; signed:1;field:unsigned client_id; offset:8; size:4; signed:0;field:unsigned src_id; offset:12; size:4; signed:0;field:unsigned ring_id; offset:16; size:4; signed:0;field:unsigned vmid; offset:20; size:4; signed:0;field:unsigned vmid_src; offset:24; size:4; signed:0;field:uint64_t timestamp; offset:32; size:8; signed:0;field:unsigned timestamp_src; offset:40; size:4; signed:0;field:unsigned pasid; offset:44; size:4; signed:0;field:unsigned src_data[4]; offset:48; size:16; signed:0;print fmt: "client_id:%u src_id:%u ring:%u vmid:%u timestamp: %llu pasid:%u src_data: %08x %08x %08x %08x
", REC->client_id, REC->src_id, REC->ring_id, REC->vmid, REC->timestamp, REC->pasid, REC->src_data[0], REC->src_data[1], REC->src_data[2], REC->src_data[3]
/* src file: "drivers/gpu/drm/gsgpu/include/gsgpu_trace.h" */TRACE_EVENT(gsgpu_iv, // 名称TP_PROTO(struct gsgpu_iv_entry *iv), // 原型TP_ARGS(iv), // 原型使用的参数TP_STRUCT__entry(__field(unsigned, client_id)__field(unsigned, src_id)__field(unsigned, ring_id)__field(unsigned, vmid)__field(unsigned, vmid_src)__field(uint64_t, timestamp)__field(unsigned, timestamp_src)__field(unsigned, pasid)__array(unsigned, src_data, 4)), // 描述将存储在跟踪程序环缓冲区中的数据的结构布局TP_fast_assign(__entry->client_id = iv->client_id;__entry->src_id = iv->src_id;__entry->ring_id = iv->ring_id;__entry->vmid = iv->vmid;__entry->vmid_src = iv->vmid_src;__entry->timestamp = iv->timestamp;__entry->timestamp_src = iv->timestamp_src;__entry->pasid = iv->pasid;__entry->src_data[0] = iv->src_data[0];__entry->src_data[1] = iv->src_data[1];__entry->src_data[2] = iv->src_data[2];__entry->src_data[3] = iv->src_data[3];), // 定义将来自参数的数据保存到环形缓冲区的方式TP_printk("client_id:%u src_id:%u ring:%u vmid:%u timestamp: %llu pasid:%u src_data: %08x %08x %08x %08x\n",__entry->client_id, __entry->src_id,__entry->ring_id, __entry->vmid,__entry->timestamp, __entry->pasid,__entry->src_data[0], __entry->src_data[1],__entry->src_data[2], __entry->src_data[3]) // 定义了如何使用printk()打印TP_STRUCT__entry结构中的字段
);/* src file: "drivers/gpu/drm/gsgpu/gpu/gsgpu_irq.c" */
void gsgpu_irq_dispatch(struct gsgpu_device *adev,struct gsgpu_iv_entry *entry)
{// .....trace_gsgpu_iv(entry); // 插桩:桩函数// .....
}
0、参考内核中已经存在的用例samples/trace_events
1、首先需要在include/trace/events/
文件夹下添加一个自己event的头文件
include/trace/events/sched.h
2、在探测点位置中调用桩函数,需要遵循注释的标准格式
kernel/sched/core.c
wangjianfeng@dev:~/source/linux-4.19-loongson$ git diff
diff --git a/drivers/gpu/drm/gsgpu/gpu/gfx_v8_0.c b/drivers/gpu/drm/gsgpu/gpu/gfx_v8_0.c
index b6e14e4c5b40..7a02414475f0 100644
--- a/drivers/gpu/drm/gsgpu/gpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/gsgpu/gpu/gfx_v8_0.c
@@ -26,6 +26,7 @@#include "gsgpu_gfx.h"#include "vi.h"#include "gsgpu_cp.h"
+#include "gsgpu_trace.h"#include "ivsrcid/ivsrcid_vislands30.h"@@ -569,6 +570,7 @@ static void gfx_v8_0_ring_emit_rreg(struct gsgpu_ring *ring, uint32_t reg)static void gfx_v8_0_ring_emit_wreg(struct gsgpu_ring *ring, uint32_t reg,uint32_t val){
+ trace_ring_emit_wreg(ring, reg, val);gsgpu_ring_write(ring, GSPKT(GSPKT_WRITE, 2) | WRITE_DST_SEL(0) | WRITE_WAIT);gsgpu_ring_write(ring, reg);gsgpu_ring_write(ring, val);
diff --git a/drivers/gpu/drm/gsgpu/include/gsgpu_trace.h b/drivers/gpu/drm/gsgpu/include/gsgpu_trace.h
index a9bb78d64a5e..ad8566a2260c 100644
--- a/drivers/gpu/drm/gsgpu/include/gsgpu_trace.h
+++ b/drivers/gpu/drm/gsgpu/include/gsgpu_trace.h
@@ -464,6 +464,25 @@ TRACE_EVENT(gsgpu_bo_move,__entry->new_placement, __entry->bo_size));+TRACE_EVENT(ring_emit_wreg,
+ TP_PROTO(struct gsgpu_ring *ring, uint32_t reg, uint32_t val),
+ TP_ARGS(ring, reg, val),
+ TP_STRUCT__entry(
+ __field(struct gsgpu_ring *, ring)
+ __field(uint32_t , reg)
+ __field(uint32_t , val)
+ ),
+ TP_fast_assign(
+ __entry->ring = ring;
+ __entry->reg = reg;
+ __entry->val = val;
+ ),
+ TP_printk("ring=%p, reg=0x%08x, val=0x%08x",
+ __entry->ring,
+ __entry->reg,
+ __entry->val)
+);
+#undef GSGPU_JOB_GET_TIMELINE_NAME#endif
1、是什么? => trace event 就是利用 ftrace 框架,实现低性能损耗,对执行流无影响的一种信息输出机制
2、背景和发展历程 => 谁引入?何时引入?用于什么场景?
3、为什么要有这样的机制? => 提供一种内核调试的解决方案,解决了一些printk手段的缺陷
4、代码实现在哪?=> 开源参考samples/trace_events
5、如何使用?=> 参考内核文档
6、应用案例?=> 一次解决Linux内核内存泄漏实战全过程;利用Ftrace追踪一次系统调用的时间分布;
7、Ftrace和trace event的区别?=> Ftrace是通用的调试框架(提供函数钩子的基础设施;基于 tracefs 文件系统的 trace 框架),trace event是利用 ftrace 框架实现的一种信息输出机制
8、trace event跟踪点的位置如何执行到插桩函数?关键改动有哪些?=> 参考trace event原理与实现的内容
9、trace evnet过滤器filter和触发器trigger都有设置的情况下,哪一个优先级更高?=> 测试结果:触发器处理过滤器之后的信息
Doc | Event Tracing
阅码场 | Linux内核tracers的实现原理与应用
张乐 | 模块化学习办法
泰晓科技 | trace-event 基本用法
pwl999 | Linux ftrace 1.2、trace event
Doc | Using the Linux Kernel Tracepoints
LWN | Using the TRACE_EVENT() macro (Part 1)
SRC | samples/trace_events
张乐 | amdgpu调试接口