zoukankan      html  css  js  c++  java
  • tracer ftrace笔记(3)——宏展开和hook和注册 Hello

    一、ftrace的宏

    1. struct tracepoint 结构

    使用 struct tracepoint 变量来描述一个 trace point。

    //include/linux/tracepoint-defs.h
    struct tracepoint {
        const char *name; //trace point的名字,内核中通过hash表管理所有的trace point,找到对应的hash slot后,需要通过name来识别具体的trace point。
        struct static_key key; //trace point状态,0表示disable,1表示enable,static_key_false(&key)判断的其实就只是key的真假。
        struct static_call_key *static_call_key;
        void *static_call_tramp;
        void *iterator;
        int (*regfunc)(void); //添加桩函数的函数
        void (*unregfunc)(void); //卸载桩函数的函数
        struct tracepoint_func __rcu *funcs; //trace point中所有的桩函数链表. 是个数组
    };
    
    struct tracepoint_func {
        void *func;
        void *data;
        int prio;
    };

    static key使用见:https://www.cnblogs.com/hellokitty2/p/15026568.html

    2. DEFINE_TRACE_FN 展开后是

    /*
     * include/linux/tracepoint.h
     * 就是定义一个名为 __tracepoint_##_name 的 struct tracepoint 结构,
     * 然后定义一个名为 __traceiter_##_name 的函数,它对 struct tracepoint::funcs[] 成员数组中的每个函数都进行调用,数组尾部要以NULL结尾。
     */
    #define DEFINE_TRACE_FN(_name, _reg, _unreg, proto, args)        \
        static const char __tpstrtab_##_name[]                \
        __section("__tracepoints_strings") = #_name;            \
        extern struct static_call_key __SCK__tp_func_##_name;    \
        int __traceiter_##_name(void *__data, proto);            \
        struct tracepoint __tracepoint_##_name    __used __section("__tracepoints") = {    \
            .name = __tpstrtab_##_name,                \
            .key = STATIC_KEY_INIT_FALSE,                \
            .static_call_key = &__SCK__tp_func_##_name,    \
            .static_call_tramp = NULL, \
            .iterator = &__traceiter_##_name,            \
            .regfunc = _reg,                    \
            .unregfunc = _unreg,                    \
            .funcs = NULL    \
        };                    \
        __TRACEPOINT_ENTRY(_name);                    \
        int __nocfi __traceiter_##_name(void *__data, proto)            \
        {                                \
            struct tracepoint_func *it_func_ptr;            \
            void *it_func;                        \
            it_func_ptr = rcu_dereference_raw((&__tracepoint_##_name)->funcs); \
            if (it_func_ptr) {                    \
                do {                        \
                    it_func = (it_func_ptr)->func;        \
                    __data = (it_func_ptr)->data;        \
                    ((void(*)(void *, proto))(it_func))(__data, args); \
                } while ((++it_func_ptr)->func);        \
            }                            \
            return 0;                        \
        }                                \
        extern struct static_call_key __SCK__tp_func_##_name;  \
        extern typeof(__traceiter_##_name) __SCT__tp_func_##_name;         \
        struct static_call_key __SCK__tp_func_##_name = {      \
            .func = __traceiter_##_name,                        \
        }

    3. __DECLARE_TRACE 宏展开后就是:

    /*
     * include/linux/tracepoint.h
     * 这个宏主要定义了一系列函数集合,常用的有 register_trace_##name、
     * trace_##name##_enabled
     * rcuidle 的还特殊弄了一个函数,还可以注册带有优先级的trace
     */
    #define __DECLARE_TRACE(name, proto, args, cond, data_proto, data_args) \
        extern int __traceiter_##name(data_proto);            \
        extern struct static_call_key __SCK__tp_func_##name;        \
        extern typeof(__traceiter_##name) __SCT__tp_func_##name;    \
        extern struct tracepoint __tracepoint_##name;            \
        static inline void __nocfi trace_##name(proto)                \
        {                                \
            if (static_key_false(&__tracepoint_##name.key))        \
                do {                                \
                    struct tracepoint_func *it_func_ptr;            \
                    int __maybe_unused __idx = 0;                \
                    void *__data;                        \
                                                \
                    if (!(cond))                        \
                        return;                     \
                    /* keep srcu and sched-rcu usage consistent */        \
                    preempt_disable_notrace();                \
                    it_func_ptr = rcu_dereference_raw((&__tracepoint_##name)->funcs); \
                    if (it_func_ptr) {                    \
                        __data = (it_func_ptr)->data;            \
                        __traceiter_##name(data_args);            \
                    }                            \
                    preempt_enable_notrace();                \
                } while (0)    \
            if (IS_ENABLED(CONFIG_LOCKDEP) && (cond)) {        \
                rcu_read_lock_sched_notrace();            \
                rcu_dereference_sched(__tracepoint_##name.funcs);\
                rcu_read_unlock_sched_notrace();        \
            }                            \
        }                                \
        static inline void trace_##name##_rcuidle(proto)        \
        {                                \
            if (static_key_false(&__tracepoint_##name.key))     \
                do {                                \
                    struct tracepoint_func *it_func_ptr;            \
                    int __maybe_unused __idx = 0;                \
                    void *__data;                        \
                                                \
                    if (!(cond))                        \
                        return;                     \
                                                \
                    /* srcu can't be used from NMI */            \
                    WARN_ON_ONCE(in_nmi());            \
                                                \
                    /* keep srcu and sched-rcu usage consistent */        \
                    preempt_disable_notrace();                \
                                                \
                    /*                            \
                     * For rcuidle callers, use srcu since sched-rcu    \
                     * doesn't work from the idle path.         \
                     */                         \
                    __idx = srcu_read_lock_notrace(&tracepoint_srcu);\
                    rcu_irq_enter_irqson();             \
                                                \
                    it_func_ptr = rcu_dereference_raw((&__tracepoint_##name)->funcs); \
                    if (it_func_ptr) {                    \
                        __data = (it_func_ptr)->data;            \
                        __traceiter_##name(data_args);            \
                    }                            \
                                                \
                    rcu_irq_exit_irqson();                \
                    srcu_read_unlock_notrace(&tracepoint_srcu, __idx);\
                                                \
                    preempt_enable_notrace();                \
                } while (0)    \
        }    \
         static inline int register_trace_##name(void (*probe)(data_proto), void *data)    \
        {                                \
            return tracepoint_probe_register(&__tracepoint_##name, (void *)probe, data);    \
        }                                \
        static inline int register_trace_prio_##name(void (*probe)(data_proto), void *data, int prio) \
        {                                \
            return tracepoint_probe_register_prio(&__tracepoint_##name, (void *)probe, data, prio); \
        }                                \
        static inline int unregister_trace_##name(void (*probe)(data_proto), void *data)    \
        {                                \
            return tracepoint_probe_unregister(&__tracepoint_##name, (void *)probe, data);    \
        }                                \
        static inline void check_trace_callback_type_##name(void (*cb)(data_proto))    \
        {                                \
        }                                \
        static inline bool trace_##name##_enabled(void)                    \
        {                                \
            return static_key_false(&__tracepoint_##name.key);    \
        }

    trace_##name(proto) 中判断 __tracepoint_##name.key 的值为真才会调用执行各个钩子函数,在下面路径中会将这个key设置为真。

    register_trace_##name() //具体tracepoint的define位置
        tracepoint_probe_register //tracepoint.c
            tracepoint_probe_register_prio //tracepoint.c
                tracepoint_add_func //tracepoint.c
                    static_key_enable(&tp->key);

    也就是说注册了 hook 才会真,否则为假。 

    4. 使用 DECLARE_TRACE 的宏

    #define DEFINE_TRACE(name, proto, args)    DEFINE_TRACE_FN(name, NULL, NULL, PARAMS(proto), PARAMS(args));
    
    //为空
    #define TRACE_EVENT_FLAGS(event, flag)
    
    //为空
    #define TRACE_EVENT_PERF_PERM(event, expr...)
    
    /*
     * include/linux/tracepoint-defs.h
     * 不建议直接使用,此头文件是包含在最head位置的
     */
    #define DECLARE_TRACEPOINT(tp) extern struct tracepoint __tracepoint_##tp
    
    /*
     * 建议使用,它的作用和 trace_##name##_enabled(void) 一致,但是在头文件中
     * 使用是安全的,然而 trace_##name##_enabled(void) 在头文件中是不安全的,应
     * 该是因为不能重复定义一个函数。
     */
    #define tracepoint_enabled(tp) static_key_false(&(__tracepoint_##tp).key)
    
    /*
     * include/linux/tracepoint.h
     * 就是上面的一组函数集合,包含register_trace_##name、trace_##name##_enabled 等
     */
    #define DECLARE_TRACE(name, proto, args)                \
        __DECLARE_TRACE(name, PARAMS(proto), PARAMS(args), cpu_online(raw_smp_processor_id()), PARAMS(void *__data, proto),    PARAMS(__data, args))
    /*
     * 两个宏之间的区别就是后者 arg4 逻辑与上了 cond 参数,主要是 trace_##name、trace_##name##_rcuidle 两个函数中使用,若是判断 cond 为假,
     * 就直接返回了。
     */
    #define DECLARE_TRACE_CONDITION(name, proto, args, cond)        \
        __DECLARE_TRACE(name, PARAMS(proto), PARAMS(args), cpu_online(raw_smp_processor_id()) && (PARAMS(cond)), PARAMS(void *__data, proto), PARAMS(__data, args))
    
    /* include/linux/tracepoint.h */
    #define DECLARE_EVENT_CLASS(name, proto, args, tstruct, assign, print)
    
    #define DEFINE_EVENT(template, name, proto, args)    DECLARE_TRACE(name, PARAMS(proto), PARAMS(args))
    
    #define DEFINE_EVENT_FN(template, name, proto, args, reg, unreg)    DECLARE_TRACE(name, PARAMS(proto), PARAMS(args))
    
    #define DEFINE_EVENT_PRINT(template, name, proto, args, print)    DECLARE_TRACE(name, PARAMS(proto), PARAMS(args))
    
    #define DEFINE_EVENT_CONDITION(template, name, proto, args, cond) DECLARE_TRACE_CONDITION(name, PARAMS(proto), PARAMS(args), PARAMS(cond))
    
    #define TRACE_EVENT(name, proto, args, struct, assign, print)    DECLARE_TRACE(name, PARAMS(proto), PARAMS(args))
    
    #define TRACE_EVENT_FN(name, proto, args, struct, assign, print, reg, unreg)    DECLARE_TRACE(name, PARAMS(proto), PARAMS(args))
    
    #define TRACE_EVENT_FN_COND(name, proto, args, cond, struct, assign, print, reg, unreg) DECLARE_TRACE_CONDITION(name, PARAMS(proto), PARAMS(args), PARAMS(cond))
    
    #define TRACE_EVENT_CONDITION(name, proto, args, cond, struct, assign, print) DECLARE_TRACE_CONDITION(name, PARAMS(proto), PARAMS(args), PARAMS(cond))
    
    #define TRACE_EVENT_FLAGS(event, flag)
    
    #define TRACE_EVENT_PERF_PERM(event, expr...)
    
    #define DECLARE_EVENT_NOP(name, proto, args)                \
        static inline void trace_##name(proto)                \
        { }                                \
        static inline bool trace_##name##_enabled(void)            \
        {                                \
            return false;                        \
        }
    
    #define TRACE_EVENT_NOP(name, proto, args, struct, assign, print)    DECLARE_EVENT_NOP(name, PARAMS(proto), PARAMS(args))
    
    #define DECLARE_EVENT_CLASS_NOP(name, proto, args, tstruct, assign, print)
    
    #define DEFINE_EVENT_NOP(template, name, proto, args)    DECLARE_EVENT_NOP(name, PARAMS(proto), PARAMS(args))

    tracepoint.h 中的定义可能不是最终的,因为有文件中会先执行 #undef XXX,然后重新进行 define。观察可以发现,这些宏主要使用的是 DECLARE_TRACE,对照展开后的函数,显示是不完整的,因为 DEFINE_TRACE 相关的部分没有。因此每个trace应该还存在对 DEFINE_TRACE 进行使用的一部分。两者都存在,一个trace才圆满。


    5. 使用 DEFINE_TRACE 的部分

    /* include/trace/define_trace.h */
    #undef TRACE_EVENT
    #define TRACE_EVENT(name, proto, args, tstruct, assign, print)    DEFINE_TRACE(name, PARAMS(proto), PARAMS(args))
    
    #undef TRACE_EVENT_CONDITION
    #define TRACE_EVENT_CONDITION(name, proto, args, cond, tstruct, assign, print) \
        TRACE_EVENT(name, PARAMS(proto), PARAMS(args), PARAMS(tstruct), PARAMS(assign),    PARAMS(print))
    
    #undef TRACE_EVENT_FN
    #define TRACE_EVENT_FN(name, proto, args, tstruct, assign, print, reg, unreg)    \
        DEFINE_TRACE_FN(name, reg, unreg, PARAMS(proto), PARAMS(args))
    
    #undef TRACE_EVENT_FN_COND
    #define TRACE_EVENT_FN_COND(name, proto, args, cond, tstruct, assign, print, reg, unreg)    \
        DEFINE_TRACE_FN(name, reg, unreg, PARAMS(proto), PARAMS(args))
    
    #undef TRACE_EVENT_NOP
    #define TRACE_EVENT_NOP(name, proto, args, struct, assign, print)
    
    #undef DEFINE_EVENT_NOP
    #define DEFINE_EVENT_NOP(template, name, proto, args)
    
    #undef DEFINE_EVENT
    #define DEFINE_EVENT(template, name, proto, args) DEFINE_TRACE(name, PARAMS(proto), PARAMS(args))
    
    #undef DEFINE_EVENT_FN
    #define DEFINE_EVENT_FN(template, name, proto, args, reg, unreg) \
        DEFINE_TRACE_FN(name, reg, unreg, PARAMS(proto), PARAMS(args))
    
    #undef DEFINE_EVENT_PRINT
    #define DEFINE_EVENT_PRINT(template, name, proto, args, print)    \
        DEFINE_TRACE(name, PARAMS(proto), PARAMS(args))
    
    #undef DEFINE_EVENT_CONDITION
    #define DEFINE_EVENT_CONDITION(template, name, proto, args, cond) \
        DEFINE_EVENT(template, name, PARAMS(proto), PARAMS(args))
    
    #undef DECLARE_TRACE
    #define DECLARE_TRACE(name, proto, args)    DEFINE_TRACE(name, PARAMS(proto), PARAMS(args))

    6. EXPORT_TRACEPOINT_SYMBOL_GPL 和 EXPORT_TRACEPOINT_SYMBOL

    导出这些trace符号后,模块中才能在模块中使用

    /*
     * include/linux/tracepoint.h
     * 展开后就是
     */
    #define EXPORT_TRACEPOINT_SYMBOL_GPL(name)                \
        EXPORT_SYMBOL_GPL(__tracepoint_##name);                \
        EXPORT_SYMBOL_GPL(__traceiter_##name);                \
        EXPORT_SYMBOL_GPL(__SCK__tp_func_##name);
    
    #define EXPORT_TRACEPOINT_SYMBOL(name)                    \
        EXPORT_SYMBOL(__tracepoint_##name);                \
        EXPORT_SYMBOL(__traceiter_##name);                \
        EXPORT_SYMBOL(__SCK__tp_func_##name)

    7. 定义一个trace,TRACE_EVENT 各个成员使用的宏

    /* include/linux/tracepoint.h */
    #define PARAMS(args...) args
    #define TP_PROTO(args...)    args
    #define TP_ARGS(args...)    args
    #define TP_CONDITION(args...)    args
    //include/trace/trace_events.h
    #define TP_STRUCT__entry(args...) args
    #define TP_fast_assign(args...) args
    #define TP_printk(fmt, args...) "\"" fmt "\", "  __stringify(args)

    include/trace/events/sched.h 文件中定义了大量的CPU调度相关的trace,但是它只include了 linux/tracepoint.h 文件,说明其使用的宏全部都是来自linux/tracepoint.h 文件的,但是 tracepoint.h 中又包含了其它头文件,不排除其它头文件中又包含了其它头文件,比如 include/trace/trace_events.h 。

    8. 以 sched_migrate_task 为例来看 TRACE_EVENT

    //include/trace/events/sched.h
    TRACE_EVENT(sched_migrate_task,
    
        TP_PROTO(struct task_struct *p, int dest_cpu),
    
        TP_ARGS(p, dest_cpu),
    
        TP_STRUCT__entry(
            __array(    char,    comm,    TASK_COMM_LEN    )
            __field(    pid_t,    pid            )
            __field(    int,    prio            )
            __field(    int,    orig_cpu        )
            __field(    int,    dest_cpu        )
            __field(    int,    running            )
        ),
    
        TP_fast_assign(
            memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
            __entry->pid        = p->pid;
            __entry->prio        = p->prio; /* XXX SCHED_DEADLINE */
            __entry->orig_cpu    = task_cpu(p);
            __entry->dest_cpu    = dest_cpu;
            __entry->running    = (p->state == TASK_RUNNING);
        ),
    
        TP_printk("comm=%s pid=%d prio=%d orig_cpu=%d dest_cpu=%d running=%d",
              __entry->comm, __entry->pid, __entry->prio,
              __entry->orig_cpu, __entry->dest_cpu,
              __entry->running)
    );

    include/linux/tracepoint.h 中有注释:__field(pid_t, prev_prid) 等于 pid_t prev_pid; __array(char, prev_comm, TASK_COMM_LEN) 等于 char prev_comm[TASK_COMM_LEN];
    声明的 'local variable' 叫做 '__entry',可以在 TP_fast_assign 中使用 __entry->XX 来引用。TP_STRUCT__entry 指定环形缓冲区中的存储格式,也是 /sys/kernel/debug/tracing/events/<*>/format 导出到用户空间的格式。

    按照如下宏定义进行展开:

    #define TRACE_EVENT(name, proto, args, struct, assign, print)    DECLARE_TRACE(name, PARAMS(proto), PARAMS(args))
    #define DECLARE_TRACE(name, proto, args)                \
        __DECLARE_TRACE(name, PARAMS(proto), PARAMS(args), cpu_online(raw_smp_processor_id()), PARAMS(void *__data, proto),    PARAMS(__data, args))
    //直接映射也就是:
    #define TRACE_EVENT(name, proto, args, struct, assign, print) \
        __DECLARE_TRACE(name, PARAMS(proto), PARAMS(args), cpu_online(raw_smp_processor_id()), PARAMS(void *__data, proto),    PARAMS(__data, args))
    
    
    #define TRACE_EVENT(name, proto, args, struct, assign, print)    DEFINE_TRACE(name, PARAMS(proto), PARAMS(args))
    #define DEFINE_TRACE(name, proto, args)        DEFINE_TRACE_FN(name, NULL, NULL, PARAMS(proto), PARAMS(args));
    //直接映射也就是:
    #define TRACE_EVENT(name, proto, args, struct, assign, print)    DEFINE_TRACE_FN(name, NULL, NULL, PARAMS(proto), PARAMS(args));

    全部展开后为:

    #define __DECLARE_TRACE(name, proto, args, cond, data_proto, data_args) \
        extern int __traceiter_sched_migrate_task(void *__data, struct task_struct *p, int dest_cpu);            \
        extern struct static_call_key __SCK__tp_func_sched_migrate_task;        \
        extern typeof(__traceiter_sched_migrate_task) __SCT__tp_func_sched_migrate_task;    \
        extern struct tracepoint __tracepoint_sched_migrate_task;            \
        static inline void __nocfi trace_sched_migrate_task(struct task_struct *p, int dest_cpu)                \
        {                                \
            if (static_key_false(&__tracepoint_sched_migrate_task.key))        \
                do {                                \
                    struct tracepoint_func *it_func_ptr;            \
                    int __maybe_unused __idx = 0;                \
                    void *__data;                        \
                                                \
                    if (!cpu_online(raw_smp_processor_id()))                        \
                        return;                     \
                    /* keep srcu and sched-rcu usage consistent */        \
                    preempt_disable_notrace();                \
                    it_func_ptr = rcu_dereference_raw((&__tracepoint_sched_migrate_task)->funcs); \
                    if (it_func_ptr) {                    \
                        __data = (it_func_ptr)->data;            \
                        __traceiter_sched_migrate_task(__data, p, dest_cpu);            \
                    }                            \
                    preempt_enable_notrace();                \
                } while (0)    \
            if (IS_ENABLED(CONFIG_LOCKDEP) && cpu_online(raw_smp_processor_id())) {        \
                rcu_read_lock_sched_notrace();            \
                rcu_dereference_sched(__tracepoint_sched_migrate_task.funcs);\
                rcu_read_unlock_sched_notrace();        \
            }                            \
        }                                \
        static inline void trace_sched_migrate_task_rcuidle(struct task_struct *p, int dest_cpu)        \
        {                                \
            if (static_key_false(&__tracepoint_sched_migrate_task.key))     \
                do {                                \
                    struct tracepoint_func *it_func_ptr;            \
                    int __maybe_unused __idx = 0;                \
                    void *__data;                        \
                                                \
                    if (!cpu_online(raw_smp_processor_id()))                        \
                        return;                     \
                                                \
                    /* srcu can't be used from NMI */            \
                    WARN_ON_ONCE(in_nmi());            \
                                                \
                    /* keep srcu and sched-rcu usage consistent */        \
                    preempt_disable_notrace();                \
                                                \
                    /*                            \
                     * For rcuidle callers, use srcu since sched-rcu    \
                     * doesn't work from the idle path.         \
                     */                         \
                    __idx = srcu_read_lock_notrace(&tracepoint_srcu);\
                    rcu_irq_enter_irqson();             \
                                                \
                    it_func_ptr = rcu_dereference_raw((&__tracepoint_sched_migrate_task)->funcs); \
                    if (it_func_ptr) {                    \
                        __data = (it_func_ptr)->data;            \
                        __traceiter_sched_migrate_task(__data, p, dest_cpu);            \
                    }                            \
                                                \
                    rcu_irq_exit_irqson();                \
                    srcu_read_unlock_notrace(&tracepoint_srcu, __idx);\
                                                \
                    preempt_enable_notrace();                \
                } while (0)    \
        }    \
         static inline int register_trace_sched_migrate_task(void (*probe)(void *__data, struct task_struct *p, int dest_cpu), void *data)    \
        {                                \
            return tracepoint_probe_register(&__tracepoint_sched_migrate_task, (void *)probe, data);    \
        }                                \
        static inline int register_trace_prio_sched_migrate_task(void (*probe)(void *__data, struct task_struct *p, int dest_cpu), void *data, int prio) \
        {                                \
            return tracepoint_probe_register_prio(&__tracepoint_sched_migrate_task, (void *)probe, data, prio); \
        }                                \
        static inline int unregister_trace_sched_migrate_task(void (*probe)(void *__data, struct task_struct *p, int dest_cpu), void *data)    \
        {                                \
            return tracepoint_probe_unregister(&__tracepoint_sched_migrate_task, (void *)probe, data);    \
        }                                \
        static inline void check_trace_callback_type_sched_migrate_task(void (*cb)(void *__data, struct task_struct *p, int dest_cpu))    \
        {                                \
        }                                \
        static inline bool trace_sched_migrate_task_enabled(void)                    \
        {                                \
            return static_key_false(&__tracepoint_sched_migrate_task.key);    \
        }
    
    #define DEFINE_TRACE_FN(_name, _reg, _unreg, proto, args)        \
            static const char __tpstrtab_sched_migrate_task[]                \
            __section("__tracepoints_strings") = "sched_migrate_task";            \
            extern struct static_call_key __SCK__tp_func_sched_migrate_task;    \
            int __traceiter_sched_migrate_task(void *__data, struct task_struct *p, int dest_cpu);            \
            struct tracepoint __tracepoint_sched_migrate_task    __used __section("__tracepoints") = {    \
                .name = __tpstrtab_sched_migrate_task,             \
                .key = STATIC_KEY_INIT_FALSE,                \
                .static_call_key = &__SCK__tp_func_sched_migrate_task, \
                .static_call_tramp = NULL, \
                .iterator = &__traceiter_sched_migrate_task,            \
                .regfunc = NULL,                    \
                .unregfunc = NULL,                    \
                .funcs = NULL    \
            };                    \
            __TRACEPOINT_ENTRY(sched_migrate_task);                    \
            int __nocfi __traceiter_sched_migrate_task(void *__data, struct task_struct *p, int dest_cpu)            \
            {                                \
                struct tracepoint_func *it_func_ptr;            \
                void *it_func;                        \
                it_func_ptr = rcu_dereference_raw((&__tracepoint_sched_migrate_task)->funcs); \
                if (it_func_ptr) {                    \
                    do {                        \
                        it_func = (it_func_ptr)->func;        \
                        __data = (it_func_ptr)->data;        \
                        ((void(*)(void *, struct task_struct *p, int dest_cpu))(it_func))(__data, p, dest_cpu); \
                    } while ((++it_func_ptr)->func);        \
                }                            \
                return 0;                        \
            }                                \
            extern struct static_call_key __SCK__tp_func_sched_migrate_task;  \
            extern typeof(__traceiter_sched_migrate_task) __SCT__tp_func_sched_migrate_task;        \
            struct static_call_key __SCK__tp_func_sched_migrate_task = {      \
                .func = __traceiter_sched_migrate_task,                        \
            }

    TODO: 其它部分是怎么起作用的?

    从展开后的内容可以看到,当调用 trace_sched_migrate_task() 进行trace的时候,会调用 __traceiter_sched_migrate_task() 来遍历 struct tracepoint::funcs 数组中的每一个函数进行trace,也就是说一个trace上可以注册多个hook函数

    若使用 EXPORT_TRACEPOINT_SYMBOL_GPL(sched_migrate_task) 导出,上面加黑加粗的 __tracepoint_sched_migrate_task __traceiter_sched_migrate_task __SCK__tp_func_sched_migrate_task 三个符号会被导出来。

    9. 一个trace上注册多个hook

    既然一个trace上可以注册多个hook,那么一定会涉及到这些hook函数的调用次序的问题,见 tracepoint_probe_register 实现可知,有一个默认优先级 TRACEPOINT_DEFAULT_PRIO=10,注册函数中会传递给 struct tracepoint_func::prio,在插入到 struct tracepoint::funcs 数组时会判断优先级,优先级数值越大,越插在靠前的位置,相同优先级的话,后注册的插在后面。 比如此例子中,注册默认优先级的使用函数 register_trace_sched_migrate_task,自己指定优先级使用函数 register_trace_prio_sched_migrate_task。

    int tracepoint_probe_register(struct tracepoint *tp, void *probe, void *data)
    {
        return tracepoint_probe_register_prio(tp, probe, data, TRACEPOINT_DEFAULT_PRIO);
    }
    EXPORT_SYMBOL_GPL(tracepoint_probe_register);

    二、Google搞的vendor hook

    1. hook 的 DEFINE_HOOK_FN 解析后是

    //include/trace/hooks/vendor_hooks.h
    #define DEFINE_HOOK_FN(_name, _reg, _unreg, proto, args)        \
        static const char __tpstrtab_##_name[]                \
        __section("__tracepoints_strings") = #_name;            \
        extern struct static_call_key __SCK__tp_func_##_name;    \
        int __traceiter_##_name(void *__data, proto);            \
        struct tracepoint __tracepoint_##_name    __used __section("__tracepoints") = {    \
            .name = __tpstrtab_##_name,             \
            .key = STATIC_KEY_INIT_FALSE,                \
            .static_call_key = &__SCK__tp_func_##_name,    \
            .static_call_tramp = NULL,    \
            .iterator = &__traceiter_##_name,            \
            .regfunc = _reg,                    \
            .unregfunc = _unreg,                    \
            .funcs = NULL };                    \
        __TRACEPOINT_ENTRY(_name);                    \
        int __nocfi __traceiter_##_name(void *__data, proto)            \
        {                                \
            struct tracepoint_func *it_func_ptr;            \
            void *it_func;                        \
                                        \
            it_func_ptr = (&__tracepoint_##_name)->funcs; //不同:这里是直接访问的,ftrace是rcu_dereference_raw    \
            it_func = (it_func_ptr)->func; //不同:这里是先获取一个,ftrace中的是先判断it_func_ptr    \
            do {                            \
                __data = (it_func_ptr)->data;            \
                ((void(*)(void *, proto))(it_func))(__data, args); \
                it_func = READ_ONCE((++it_func_ptr)->func); \
            } while (it_func);    \
            return 0;                        \
        }                                   \
        extern struct static_call_key __SCK__tp_func_##_name;  \
        extern typeof(__traceiter_##_name) __SCT__tp_func_##_name;         \
        struct static_call_key __SCK__tp_func_##_name = {      \
            .func = __traceiter_##_name,                        \
        }

    注意备注上的一些和ftrace之间的不同点。

    2. hook 的 __DECLARE_HOOK 解析后是:

    //include/trace/hooks/vendor_hooks.h
    #define __DECLARE_HOOK(name, proto, args, cond, data_proto, data_args)    \
        extern int __traceiter_##name(data_proto);            \
        extern struct static_call_key __SCK__tp_func_##name;        \
        extern typeof(__traceiter_##name) __SCT__tp_func_##name;
        extern struct tracepoint __tracepoint_##name;            \
                                                                \
        static inline void __nocfi trace_##name(proto)            \
        {                                \
            if (static_key_false(&__tracepoint_##name.key))     \
                do {                                \
                    struct tracepoint_func *it_func_ptr;            \
                    void *__data;                        \
                                                \
                    if (!(cond))                        \
                        return;                        \
                                                \
                    it_func_ptr = (&__tracepoint_##name)->funcs;        \
                    if (it_func_ptr) {                    \
                        __data = (it_func_ptr)->data;            \
                        __traceiter_##name(data_args);     \
                    }                            \
                } while (0)    \
        }                                \
        static inline bool trace_##name##_enabled(void)        \
        {                                \
            return static_key_false(&__tracepoint_##name.key);    \
        }                                \
        static inline int register_trace_##name(void (*probe)(data_proto), void *data)    \
        {                                \
            return android_rvh_probe_register(&__tracepoint_##name, (void *)probe, data);    \
        }                                \
        /* vendor hooks cannot be unregistered */            \

    相比与ftrace,hook的trace 删除了 trace_##name##_rcuidle()、register_trace_prio_##name()、unregister_trace_##name()、check_trace_callback_type_##name()

    3. 其它宏

    #undef DECLARE_RESTRICTED_HOOK
    #define DECLARE_RESTRICTED_HOOK(name, proto, args, cond) \
        DEFINE_HOOK_FN(name, NULL, NULL, PARAMS(proto), PARAMS(args))
    
    #undef DECLARE_RESTRICTED_HOOK
    #define DECLARE_RESTRICTED_HOOK(name, proto, args, cond) \
        __DECLARE_HOOK(name, PARAMS(proto), PARAMS(args), cond, PARAMS(void *__data, proto),PARAMS(__data, args))

    4. 总结

    Google的vendor hook在ftrace的基础上做了改动,由于Google的Hook宏删除了ftrace中的 register_trace_prio_##name(),因此不能注册带有优先级的钩子函数了。

    三、实验

    1. 对5.10内核中的 util_est_update 中的trace添加hook

    static inline void util_est_update(struct cfs_rq *cfs_rq, struct task_struct *p, bool task_sleep) //fair.c
    {
        ...
        //Google 搞的 vendor hook
        trace_android_rvh_util_est_update(cfs_rq, p, task_sleep, &ret);
        if (ret)
            return;
        ...
        //普通的ftrace
        trace_sched_util_est_se_tp(&p->se);
    }

    这两个trace符号Google已经导出来了:

    EXPORT_TRACEPOINT_SYMBOL_GPL(android_rvh_util_est_update); //vendor_hooks.c
    EXPORT_TRACEPOINT_SYMBOL_GPL(sched_util_est_se_tp); //core.c

    2. 实验代码

    /* 1. 包含头文件 */
    #include <trace/events/sched.h>
    
    
    /* 2. 实现handler钩子函数,类型要与 trace##name() 的类型相同 */
    //util_est_update() //fair.c
    void android_rvh_util_est_update_handler(void *data, struct cfs_rq *cfs_rq, struct task_struct *p, bool task_sleep, int *ret_o)
    {
        struct util_est *se_ue = &p->se.avg.util_est;
        struct util_est *rq_ue = &cfs_rq->avg.util_est;
        trace_printk("start: first_register: se_ue->enqueued=%d, se_ue->ewma=%d, rq_ue->enqueued=%d, rq_ue->ewma=%d, task_sleep=%d\n",
                se_ue->enqueued, se_ue->ewma, rq_ue->enqueued, rq_ue->ewma, task_sleep);
        *ret_o = 0;
    }
    
    void android_rvh_util_est_update_handler_second(void *data, struct cfs_rq *cfs_rq, struct task_struct *p, bool task_sleep, int *ret_o)
    {
        struct util_est *se_ue = &p->se.avg.util_est;
        struct util_est *rq_ue = &cfs_rq->avg.util_est;
        trace_printk("start: second_register: se_ue->enqueued=%d, se_ue->ewma=%d, rq_ue->enqueued=%d, rq_ue->ewma=%d, task_sleep=%d\n",
                se_ue->enqueued, se_ue->ewma, rq_ue->enqueued, rq_ue->ewma, task_sleep);
        *ret_o = 0;
    }
    
    //只改变这一个debug优先级, 默认优先级是10
    void sched_util_est_se_tp_handler(void *data, struct sched_entity *se)
    {
        static int count = 0;
        int prio = 10;
    
        if (entity_is_task(se)) {
            struct task_struct *p = container_of(se, struct task_struct, se);
            struct rq *rq = cpu_rq(task_cpu(p));
            struct cfs_rq *cfs_rq = &rq->cfs;
            struct util_est *se_ue = &p->se.avg.util_est;
            struct util_est *rq_ue = &cfs_rq->avg.util_est;
            trace_printk("end: count=%d, prio=%d, se_ue->enqueued=%d, se_ue->ewma=%d, rq_ue->enqueued=%d, rq_ue->ewma=%d\n",
                    count++, prio, se_ue->enqueued, se_ue->ewma, rq_ue->enqueued, rq_ue->ewma);
        } else {
            trace_printk("end: se is not task\n");
        }
    }
    
    void sched_util_est_se_tp_handler_prio_12(void *data, struct sched_entity *se)
    {
        static int count = 0;
        int prio = 12;
    
        if (entity_is_task(se)) {
            struct task_struct *p = container_of(se, struct task_struct, se);
            struct rq *rq = cpu_rq(task_cpu(p));
            struct cfs_rq *cfs_rq = &rq->cfs;
            struct util_est *se_ue = &p->se.avg.util_est;
            struct util_est *rq_ue = &cfs_rq->avg.util_est;
            trace_printk("end: count=%d, prio=%d, se_ue->enqueued=%d, se_ue->ewma=%d, rq_ue->enqueued=%d, rq_ue->ewma=%d\n",
                    count++, prio, se_ue->enqueued, se_ue->ewma, rq_ue->enqueued, rq_ue->ewma);
        } else {
            trace_printk("end: se is not task\n");
        }
    }
    
    
    /* 3. 注册handler */
    //common register
    register_trace_android_rvh_util_est_update(android_rvh_util_est_update_handler, NULL);
    register_trace_sched_util_est_se_tp(sched_util_est_se_tp_handler, NULL);
    //google vendor couldn't use prio, because not defined.
    register_trace_android_rvh_util_est_update(android_rvh_util_est_update_handler_second, NULL);
    //ftrace register with prio.
    register_trace_prio_sched_util_est_se_tp(sched_util_est_se_tp_handler_prio_12, NULL, 12);

    3. 实验结果,打印的前后关系:

    # echo 1 > tracing_on
    # cat trace_pipe
    <...>-338     [005] d..3    32.158404: sched_util_est_se_tp_handler_prio_12: end: count=28494, prio=12, se_ue->enqueued=39, se_ue->ewma=48, rq_ue->enqueued=87, rq_ue->ewma=0
    <...>-338     [005] d..3    32.158404: sched_util_est_se_tp_handler: end: count=28493, prio=10, se_ue->enqueued=39, se_ue->ewma=48, rq_ue->enqueued=87, rq_ue->ewma=0
    
    <...>-338     [005] d..2    32.158410: android_rvh_util_est_update_handler: start: first_register: se_ue->enqueued=39, se_ue->ewma=48, rq_ue->enqueued=87, rq_ue->ewma=0, task_sleep=1
    <...>-338     [005] d..2    32.158410: android_rvh_util_est_update_handler_second: start: second_register: se_ue->enqueued=39, se_ue->ewma=48, rq_ue->enqueued=87, rq_ue->ewma=0, task_sleep=1

    普通ftrace,注册时指定的优先级数值越大,越先调用。vendor hook 没有带有优先级注册的钩子函数,先注册的钩子函数调用在前,后注册的钩子函数调用在后。

    看代码实现,就算是不执行 “echo 1 > tracing_on” 这些钩子函数应该也会被调用执行,只不过不会打印出来。

    4. 另一种注册trace hook的方法

    struct tracepoints_table {
        const char *name;
        void *func;
        struct tracepoint *tp;
        bool registered;
    };
    
    static struct tracepoints_table g_tracepoints_table[] = {
        {.name = "android_rvh_util_est_update", .func = android_rvh_util_est_update_handler},
        {.name = "sched_util_est_se_tp", .func = sched_util_est_se_tp_handler},
    };
    
    static void lookup_tracepoints(struct tracepoint *tp, void *ignore)
    {
        int i;
    
        for (i = 0; i < ARRAY_SIZE(g_tracepoints_table); i++) {
            if (!strcmp(g_tracepoints_table[i].name, tp->name))
                g_tracepoints_table[i].tp = tp;
        }
    }
    
    static void register_tracepoints_table(void)
    {
        int i, ret;
        struct tracepoints_table *tt;
    
        for_each_kernel_tracepoint(lookup_tracepoints, NULL); //找到匹配的tracepoint结构
        for (i = 0; i < ARRAY_SIZE(g_tracepoints_table); i++) {
            tt = &g_tracepoints_table[i];
            if (tt->tp) {
                ret = tracepoint_probe_register(tt->tp, tt->func,  NULL);
                if (ret) {
                    pr_info("couldn't activate tracepoint %pf\n", tt->func);
                    tracepoint_cleanup(i);
                }
                tt->registered = true;
            }
        }
    }
    
    void tracepoint_cleanup(int index)
    {
        int i;
        struct tracepoints_table *tt;
    
        for (i = 0; i < index; i++) { 
            tracepoints_table *tt = &g_tracepoints_table[i];
            if (tt->registered) {
                tracepoint_probe_unregister(tt->tp, tt->func, NULL);
                tt->registered = false;
            }
        }
    }

    可见这种注册需要便利 tracepoint 区域对name进行对比,效率比较低,优点是涉及的文件比较少。

  • 相关阅读:
    查看tls指纹
    并行流
    方法引入2
    方法引入
    Optional.ofNullable
    stream.filter
    stream.skip limit
    反射
    Optional orElseGet
    nginx 预压缩(gzip)
  • 原文地址:https://www.cnblogs.com/hellokitty2/p/15522289.html
Copyright © 2011-2022 走看看