zoukankan      html  css  js  c++  java
  • Linux内核kprobe机制实现浅析

       Kprobe机制是内核提供的一种调试机制,它提供了一种方法,能够在不修改现有代码的基础上,灵活的跟踪内核函数的执行。它的基本工作原理是:用户指定一个探测点,并把一个用户定义的处理函数关联到该探测点,当内核执行到该探测点时,相应的关联函数被执行,然后继续执行正常的代码路径。
         Kprobe提供了三种形式的探测点,一种是最基本的kprobe,能够在指定代码执行前、执行后进行探测,但此时不能访问被探测函数内的相关变量信息;一种是jprobe,用于探测某一函数的入口,并且能够访问对应的函数参数;一种是kretprobe,用于完成指定函数返回值的探测功能。其中最基本的就是kprobe机制,jprobe以及kretprobe的实现都依赖于kprobe,但其代码的实现都很巧妙,强烈建议每一个内核爱好者阅读。
        
        好了,闲话少叙,开始上代码:
      首先是struct kprobe结构,每一个探测点的基本结构

    点击(此处)折叠或打开

    1. struct kprobe {
    2.     /*用于保存kprobe的全局hash表,以被探测的addr为key*/
    3.     struct hlist_node hlist;

    4.     /* list of kprobes for multi-handler support */
    5.     /*当对同一个探测点存在多个探测函数时,所有的函数挂在这条链上*/
    6.     struct list_head list;

    7.     /*count the number of times this probe was temporarily disarmed */
    8.     unsigned long nmissed;

    9.     /* location of the probe point */
    10.     /*被探测的目标地址*/
    11.     kprobe_opcode_t *addr;

    12.     /* Allow user to indicate symbol name of the probe point */
    13.     /*symblo_name的存在,允许用户指定函数名而非确定的地址*/
    14.     const char *symbol_name;

    15.     /* Offset into the symbol */
    16.     /*如果被探测点为函数内部某个指令,需要使用addr + offset的方式*/
    17.     unsigned int offset;

    18.     /* Called before addr is executed. */
    19.     /*探测函数,在目标探测点执行之前调用*/
    20.     kprobe_pre_handler_t pre_handler;

    21.     /* Called after addr is executed, unless... */
    22.     /*探测函数,在目标探测点执行之后调用*/
    23.     kprobe_post_handler_t post_handler;

    24.     /*
    25.      * ... called if executing addr causes a fault (eg. page fault).
    26.      * Return 1 if it handled fault, otherwise kernel will see it.
    27.      */
    28.     kprobe_fault_handler_t fault_handler;

    29.     /*
    30.      * ... called if breakpoint trap occurs in probe handler.
    31.      * Return 1 if it handled break, otherwise kernel will see it.
    32.      */
    33.     kprobe_break_handler_t break_handler;

    34.     /*opcode 以及 ainsn 用于保存被替换的指令码*/
    35.     
    36.     /* Saved opcode (which has been replaced with breakpoint) */
    37.     kprobe_opcode_t opcode;

    38.     /* copy of the original instruction */
    39.     struct arch_specific_insn ainsn;

    40.     /*
    41.      * Indicates various status flags.
    42.      * Protected by kprobe_mutex after this kprobe is registered.
    43.      */
    44.     u32 flags;
    45. };
        对于kprobe功能的实现主要利用了内核中的两个功能特性:异常(尤其是int 3),单步执行(EFLAGS中的TF标志)。
        大概的流程:
     1)在注册探测点的时候,对被探测函数的指令码进行替换,替换为int 3的指令码;
     2)在执行int 3的异常执行中,通过通知链的方式调用kprobe的异常处理函数;
     3)在kprobe的异常出来函数中,判断是否存在pre_handler钩子,存在则执行;
     4)执行完后,准备进入单步调试,通过设置EFLAGS中的TF标志位,并且把异常返回的地址修改为保存的原指令码;
     5)代码返回,执行原有指令,执行结束后触发单步异常;
     6)在单步异常的处理中,清除单步标志,执行post_handler流程,并最终返回;

        下面又进入代码时间,首先看一下kprobe模块的初始化代码,初始化代码主要做了两件事:标记出哪些代码是不能被探测的,这些代码属于kprobe实现的关键代码;注册通知链到die_notifier,用于接收异常通知。

    点击(此处)折叠或打开

    1. 初始化代码位于kernel/kprobes.c中
    2. static int __init init_kprobes(void)
    3. {
    4.     int i, err = 0;
    5.         ....

    6.      /*kprobe_blacklist中保存的是kprobe实现的关键代码路径,这些函数不应该被kprobe探测*/
    7.     /*
    8.      * Lookup and populate the kprobe_blacklist.
    9.      *
    10.      * Unlike the kretprobe blacklist, we'll need to determine
    11.      * the range of addresses that belong to the said functions,
    12.      * since a kprobe need not necessarily be at the beginning
    13.      * of a function.
    14.      */
    15.     for (kb = kprobe_blacklist; kb->name != NULL; kb++) {
    16.         kprobe_lookup_name(kb->name, addr);
    17.         if (!addr)
    18.             continue;

    19.         kb->start_addr = (unsigned long)addr;
    20.         symbol_name = kallsyms_lookup(kb->start_addr,
    21.                 &size, &offset, &modname, namebuf);
    22.         if (!symbol_name)
    23.             kb->range = 0;
    24.         else
    25.             kb->range = size;
    26.     }
    27.         ....
    28.     if (!err)
    29.         /*注册通知链到die_notifier,用于接收int 3的异常信息*/
    30.         err = register_die_notifier(&kprobe_exceptions_nb);
    31.          ....
    32. }
    33. 其中的通知链:
    34. static struct notifier_block kprobe_exceptions_nb = {
    35.     .notifier_call = kprobe_exceptions_notify,
    36.     /*优先级最高,保证最先执行*/
    37.     .priority = 0x7fffffff /* we need to be notified first */
    38. };
        kprobe的注册流程register_kprobe。

    点击(此处)折叠或打开

    1. int __kprobes register_kprobe(struct kprobe *p)
    2. {
    3.     int ret = 0;
    4.     struct kprobe *old_p;
    5.     struct module *probed_mod;
    6.     kprobe_opcode_t *addr;

    7.     /*获取被探测点的地址,指定了symbol_name,则从kallsyms中获取;指定了offset,则返回addr + offset*/
    8.     addr = kprobe_addr(p);
    9.     if (!addr)
    10.         return -EINVAL;
    11.     p->addr = addr;

    12.     /*判断同一个kprobe是否被重复注册*/
    13.     ret = check_kprobe_rereg(p);
    14.     if (ret)
    15.         return ret;

    16.     jump_label_lock();
    17.     preempt_disable();
    18.     /*判断被注册的函数是否位于内核的代码段内,或位于不能探测的kprobe实现路径中*/
    19.     if (!kernel_text_address((unsigned long) p->addr) ||
    20.      in_kprobes_functions((unsigned long) p->addr) ||
    21.      ftrace_text_reserved(p->addr, p->addr) ||
    22.      jump_label_text_reserved(p->addr, p->addr))
    23.         goto fail_with_jump_label;

    24.     /* User can pass only KPROBE_FLAG_DISABLED to register_kprobe */
    25.     p->flags &= KPROBE_FLAG_DISABLED;

    26.     /*
    27.      * Check if are we probing a module.
    28.      */
    29.     /*判断被探测的地址是否属于某一个模块,并且位于模块的text section内*/
    30.     probed_mod = __module_text_address((unsigned long) p->addr);
    31.     if (probed_mod) {
    32.         /*如果被探测的为模块地址,首先要增加模块的引用计数*/
    33.         /*
    34.          * We must hold a refcount of the probed module while updating
    35.          * its code to prohibit unexpected unloading.
    36.          */
    37.         if (unlikely(!try_module_get(probed_mod)))
    38.             goto fail_with_jump_label;

    39.         /*
    40.          * If the module freed .init.text, we couldn't insert
    41.          * kprobes in there.
    42.          */
    43.         /*如果被探测的地址位于模块的init地址段内,但该段代码区间已被释放,则直接退出*/
    44.         if (within_module_init((unsigned long)p->addr, probed_mod) &&
    45.          probed_mod->state != MODULE_STATE_COMING) {
    46.             module_put(probed_mod);
    47.             goto fail_with_jump_label;
    48.         }
    49.     }
    50.     preempt_enable();
    51.     jump_label_unlock();

    52.     p->nmissed = 0;
    53.     INIT_LIST_HEAD(&p->list);
    54.     mutex_lock(&kprobe_mutex);

    55.     jump_label_lock(); /* needed to call jump_label_text_reserved() */

    56.     get_online_cpus();    /* For avoiding text_mutex deadlock. */
    57.     mutex_lock(&text_mutex);

    58.     /*判断在同一个探测点是否已经注册了其他的探测函数*/
    59.     old_p = get_kprobe(p->addr);
    60.     if (old_p) {
    61.         /* Since this may unoptimize old_p, locking text_mutex. */
    62.         /*如果已经存在注册过的kprobe,则将探测点的函数修改为aggr_pre_handler,并将所有的handler挂载到其链表上,由其负责所有handler函数的执行*/
    63.         ret = register_aggr_kprobe(old_p, p);
    64.         goto out;
    65.     }

    66.     /* 分配特定的内存地址用于保存原有的指令
    67.      * 按照内核注释,被分配的地址必须must be on special executable page on x86.
    68.      * 该地址被保存在kprobe->ainsn.insn
    69.      */
    70.     ret = arch_prepare_kprobe(p);
    71.     if (ret)
    72.         goto out;

    73.     /*将kprobe加入到相应的hash表内*/
    74.     INIT_HLIST_NODE(&p->hlist);
    75.     hlist_add_head_rcu(&p->hlist,
    76.          &kprobe_table[hash_ptr(p->addr, KPROBE_HASH_BITS)]);

    77.     if (!kprobes_all_disarmed && !kprobe_disabled(p))
    78. /*将探测点的指令码修改为int 3指令*/
    79.         __arm_kprobe(p);

    80.     /* Try to optimize kprobe */
    81.     try_to_optimize_kprobe(p);

    82. out:
    83.     mutex_unlock(&text_mutex);
    84.     put_online_cpus();
    85.     jump_label_unlock();
    86.     mutex_unlock(&kprobe_mutex);

    87.     if (probed_mod)
    88.         module_put(probed_mod);

    89.     return ret;

    90. fail_with_jump_label:
    91.     preempt_enable();
    92.     jump_label_unlock();
    93.     return -EINVAL;
        注册完毕,就开始kprobe的执行流程了。对于该探测点,由于其起始指令已经被修改为int3,因此在执行到该地址时,必然会触发3号中断向量的处理流程do_int3.

    点击(此处)折叠或打开

    1. /* May run on IST stack. */
    2. dotraplinkage void __kprobes do_int3(struct pt_regs *regs, long error_code)
    3. {
    4. #ifdef CONFIG_KGDB_LOW_LEVEL_TRAP
    5.     if (kgdb_ll_trap(DIE_INT3, "int3", regs, error_code, 3, SIGTRAP)
    6.             == NOTIFY_STOP)
    7.         return;
    8. #endif /* CONFIG_KGDB_LOW_LEVEL_TRAP */
    9. #ifdef CONFIG_KPROBES
    10.     /*在这里以DIE_INT3,通知kprobe注册的通知链*/
    11.     if (notify_die(DIE_INT3, "int3", regs, error_code, 3, SIGTRAP)
    12.             == NOTIFY_STOP)
    13.         return;
    14. #else
    15.     if (notify_die(DIE_TRAP, "int3", regs, error_code, 3, SIGTRAP)
    16.             == NOTIFY_STOP)
    17.         return;
    18. #endif

    19.     preempt_conditional_sti(regs);
    20.     do_trap(3, SIGTRAP, "int3", regs, error_code, NULL);
    21.     preempt_conditional_cli(regs);
    22. }
        在do_int3中触发kprobe注册的通知链函数,kprobe_exceptions_notify。由于kprobe以及jprobe等机制的处理核心都在此函数内,这里只针对kprobe的流程进行分析:进入函数的原因是DIE_INT3,并且是第一次进入该函数。

    点击(此处)折叠或打开

    1. int __kprobes kprobe_exceptions_notify(struct notifier_block *self,
    2.                  unsigned long val, void *data)
    3. {
    4.     struct die_args *args = data;
    5.     int ret = NOTIFY_DONE;

    6.     if (args->regs && user_mode_vm(args->regs))
    7.         return ret;

    8.     switch (val) {
    9.     case DIE_INT3:
    10. /*对于kprobe,进入kprobe_handle*/
    11.         if (kprobe_handler(args->regs))
    12.             ret = NOTIFY_STOP;
    13.         break;
    14.     case DIE_DEBUG:
    15.         if (post_kprobe_handler(args->regs)) {
    16.             /*
    17.              * Reset the BS bit in dr6 (pointed by args->err) to
    18.              * denote completion of processing
    19.              */
    20.             (*(unsigned long *)ERR_PTR(args->err)) &= ~DR_STEP;
    21.             ret = NOTIFY_STOP;
    22.         }
    23.         break;
    24.     case DIE_GPF:
    25.         /*
    26.          * To be potentially processing a kprobe fault and to
    27.          * trust the result from kprobe_running(), we have
    28.          * be non-preemptible.
    29.          */
    30.         if (!preemptible() && kprobe_running() &&
    31.          kprobe_fault_handler(args->regs, args->trapnr))
    32.             ret = NOTIFY_STOP;
    33.         break;
    34.     default:
    35.         break;
    36.     }
    37.     return ret;
    38. }

    点击(此处)折叠或打开

    1. static int __kprobes kprobe_handler(struct pt_regs *regs)
    2. {
    3.     kprobe_opcode_t *addr;
    4.     struct kprobe *p;
    5.     struct kprobe_ctlblk *kcb;

    6.     /*对于int 3中断,其被Intel定义为Trap,那么异常发生时EIP寄存器内指向的为异常指令的后一条指令*/
    7.     addr = (kprobe_opcode_t *)(regs->ip - sizeof(kprobe_opcode_t));
    8.     /*
    9.      * We don't want to be preempted for the entire
    10.      * duration of kprobe processing. We conditionally
    11.      * re-enable preemption at the end of this function,
    12.      * and also in reenter_kprobe() and setup_singlestep().
    13.      */
    14.     preempt_disable();

    15.     kcb = get_kprobe_ctlblk();
    16.     /*获取addr对应的kprobe*/
    17.     p = get_kprobe(addr);

    18.     if (p) {
    19. /*如果异常的进入是由kprobe导致,则进入reenter_kprobe(jprobe需要,到时候分析)*/
    20.         if (kprobe_running()) {
    21.             if (reenter_kprobe(p, regs, kcb))
    22.                 return 1;
    23.         } else {
    24.             set_current_kprobe(p, regs, kcb);
    25.             kcb->kprobe_status = KPROBE_HIT_ACTIVE;

    26.             /*
    27.              * If we have no pre-handler or it returned 0, we
    28.              * continue with normal processing. If we have a
    29.              * pre-handler and it returned non-zero, it prepped
    30.              * for calling the break_handler below on re-entry
    31.              * for jprobe processing, so get out doing nothing
    32.              * more here.
    33.              */
    34.     /*执行在此地址上挂载的pre_handle函数*/
    35.             if (!p->pre_handler || !p->pre_handler(p, regs))
    36. /*设置单步调试模式,为post_handle函数的执行做准备*/
    37.                 setup_singlestep(p, regs, kcb, 0);
    38.             return 1;
    39.         }
    40.     } else if (*addr != BREAKPOINT_INSTRUCTION) {
    41.         /*
    42.          * The breakpoint instruction was removed right
    43.          * after we hit it. Another cpu has removed
    44.          * either a probepoint or a debugger breakpoint
    45.          * at this address. In either case, no further
    46.          * handling of this interrupt is appropriate.
    47.          * Back up over the (now missing) int3 and run
    48.          * the original instruction.
    49.          */
    50.         regs->ip = (unsigned long)addr;
    51.         preempt_enable_no_resched();
    52.         return 1;
    53.     } else if (kprobe_running()) {
    54.         p = __this_cpu_read(current_kprobe);
    55.         if (p->break_handler && p->break_handler(p, regs)) {
    56.             setup_singlestep(p, regs, kcb, 0);
    57.             return 1;
    58.         }
    59.     } /* else: not a kprobe fault; let the kernel handle it */

    60.     preempt_enable_no_resched();
    61.     return 0;
    62. }

    点击(此处)折叠或打开

    1. static void __kprobes setup_singlestep(struct kprobe *p, struct pt_regs *regs,
    2.                  struct kprobe_ctlblk *kcb, int reenter)
    3. {
    4.     if (setup_detour_execution(p, regs, reenter))
    5.         return;

    6. #if !defined(CONFIG_PREEMPT)
    7.     if (p->ainsn.boostable == 1 && !p->post_handler) {
    8.         /* Boost up -- we can execute copied instructions directly */
    9.         if (!reenter)
    10.             reset_current_kprobe();
    11.         /*
    12.          * Reentering boosted probe doesn't reset current_kprobe,
    13.          * nor set current_kprobe, because it doesn't use single
    14.          * stepping.
    15.          */
    16.         regs->ip = (unsigned long)p->ainsn.insn;
    17.         preempt_enable_no_resched();
    18.         return;
    19.     }
    20. #endif
    21.     /*jprobe*/
    22.     if (reenter) {
    23.         save_previous_kprobe(kcb);
    24.         set_current_kprobe(p, regs, kcb);
    25.         kcb->kprobe_status = KPROBE_REENTER;
    26.     } else
    27.         kcb->kprobe_status = KPROBE_HIT_SS;
    28.     /* Prepare real single stepping */
    29.     /*准备单步模式,设置EFLAGS的TF标志位,清楚IF标志位(禁止中断)*/
    30.     clear_btf();
    31.     regs->flags |= X86_EFLAGS_TF;
    32.     regs->flags &= ~X86_EFLAGS_IF;
    33.     /* single step inline if the instruction is an int3 */
    34.     if (p->opcode == BREAKPOINT_INSTRUCTION)
    35.         regs->ip = (unsigned long)p->addr;
    36.     else
    37. /*设置异常返回的指令为保存的被探测点的指令*/
    38.         regs->ip = (unsigned long)p->ainsn.insn;
    39. }
         对应kprobe,pre_handle的执行就结束了,按照代码,程序开始执行保存的被探测点的指令,由于开启了单步调试模式,执行完指令后会继续触发异常,这次的是do_debug异常处理流程。

    点击(此处)折叠或打开

    1. dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code)
    2. {
    3.     ....

    4.     /*在do_debug中,以DIE_DEBUG再一次触发kprobe的通知链*/
    5.     if (notify_die(DIE_DEBUG, "debug", regs, PTR_ERR(&dr6), error_code,
    6.                             SIGTRAP) == NOTIFY_STOP)
    7.         return;
    8.    
    9.     ....
    10.     return;
    11. }

    点击(此处)折叠或打开

    1. /*对于kprobe_exceptions_notify,其DIE_DEBUG处理流程*/
    2. case DIE_DEBUG:
    3.         if (post_kprobe_handler(args->regs)) {
    4.             /*
    5.              * Reset the BS bit in dr6 (pointed by args->err) to
    6.              * denote completion of processing
    7.              */
    8.             (*(unsigned long *)ERR_PTR(args->err)) &= ~DR_STEP;
    9.             ret = NOTIFY_STOP;
    10.         }
    11.         break;

    12. static int __kprobes post_kprobe_handler(struct pt_regs *regs)
    13. {
    14.     struct kprobe *cur = kprobe_running();
    15.     struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();

    16.     if (!cur)
    17.         return 0;

    18.     /*设置异常返回的EIP为下一条需要执行的指令*/
    19.     resume_execution(cur, regs, kcb);
    20.     /*恢复异常执行前的EFLAGS*/
    21.     regs->flags |= kcb->kprobe_saved_flags;

    22.     /*执行post_handler函数*/
    23.     if ((kcb->kprobe_status != KPROBE_REENTER) && cur->post_handler) {
    24.         kcb->kprobe_status = KPROBE_HIT_SSDONE;
    25.         cur->post_handler(cur, regs, 0);
    26.     }

    27.     /* Restore back the original saved kprobes variables and continue. */
    28.     if (kcb->kprobe_status == KPROBE_REENTER) {
    29.         restore_previous_kprobe(kcb);
    30.         goto out;
    31.     }
    32.     reset_current_kprobe();
    33. out:
    34.     preempt_enable_no_resched();

    35.     /*
    36.      * if somebody else is singlestepping across a probe point, flags
    37.      * will have TF set, in which case, continue the remaining processing
    38.      * of do_debug, as if this is not a probe hit.
    39.      */
    40.     if (regs->flags & X86_EFLAGS_TF)
    41.         return 0;

    42.     return 1;
    43. }
        至此,一个典型的kprobe的流程已经执行完毕了。

    <script>window._bd_share_config={"common":{"bdSnsKey":{},"bdText":"","bdMini":"2","bdMiniList":false,"bdPic":"","bdStyle":"0","bdSize":"16"},"share":{}};with(document)0[(getElementsByTagName('head')[0]||body).appendChild(createElement('script')).src='http://bdimg.share.baidu.com/static/api/js/share.js?v=89860593.js?cdnversion='+~(-new Date()/36e5)];</script>
    阅读(70) | 评论(0) | 转发(0) |
    给主人留下些什么吧!~~
    评论热议
  • 相关阅读:
    实验一、DOS使用命令实验
    实验三、进程调度模拟程序
    实验四、存储管理
    实验二、作业调度模拟程序
    简单的DOS命令
    结构化方法和面向对象方法的比较
    jstree 取消选中父节点
    T4 模板代码生成
    基于Open XML 导出数据到Excel
    菜单(列存储转为行存储)
  • 原文地址:https://www.cnblogs.com/ztguang/p/12649186.html
Copyright © 2011-2022 走看看