zoukankan      html  css  js  c++  java
  • apache分析之二、post-commit后台运行

    一、问题描述
    希望在svn的post-commit中执行一个后台任务,但是发现该后台任务没有退出之前,svn提交始终不会返回。按照bash派生的后台任务的定义,就是在子进程派生之后,父进程不会把终端输入(终端的前台任务)派发给后台任务,也不会同步等该该子进程的返回。在父shell退出之后,内核会把退出进程的父进程设置为系统的始祖进程,也就是1号任务。此时svn已经不再是post-commit的父进程,所以它无权wait同步等待post-commit的退出,所以它一定使用了其它的同步机制。
    二、内核中对于进程退出的处理
    do_exit--->>exit_notify
    static void
    forget_original_parent(struct task_struct *father, struct list_head *to_release)
    {
        struct task_struct *p, *reaper = father;
        struct list_head *_p, *_n;

        do {
            reaper = next_thread(reaper);
            if (reaper == father) {
                reaper = child_reaper(father);
                break;
            }
        } while (reaper->exit_state);
    static inline struct task_struct *child_reaper(struct task_struct *tsk)
    {
        return init_pid_ns.child_reaper;
    }
    这里的意思大致是这样的,如果退出的进程是一个进程中的一个线程,那么将自己子进程托付给线程组中的另一个线程,相当于自己的兄弟线程。可惜的是这种情况在通常的我们使用的工具都不会存在,一个进程里只有一个线程,不存在兄弟线程。此时就要执行第二步、或者叫做plan B,该方法就是把自己的子进程重新设置到系统的初始化进程,也就是1号进程。
    三、做一个验证
    在post-commit中添加一个
    sleep 1234 &
    这样通过ps可以方便的找到这个任务
    root@Harry hooks]# ps aux | grep sleep
    daemon   19691  0.0  0.0   1892   400 pts/0    S    13:20   0:00 sleep 1234
    root     22089  0.0  0.0   4220   696 pts/1    S+   13:34   0:00 grep sleep
    [root@Harry hooks]# cat /proc/19691/status
    Name:    sleep
    State:    S (sleeping)
    Tgid:    19691
    Pid:    19691
    PPid:    1
    TracerPid:    0
    Uid:    2    2    2    2
    Gid:    2    2    2    2
    Utrace:    0
    FDSize:    256
    Groups:    1 2 4 7 
    可以看到,此时sleep的父进程的确已经变成了1号进程,也就是系统的始祖进程。
    [root@Harry hooks]# ps aux | grep post-commit
    daemon   19690  0.0  0.0      0     0 pts/0    Z    13:20   0:00 [post-commit] <defunct>
    root     24274  0.0  0.0   4220   696 pts/1    S+   13:47   0:00 grep post-commit
    此时它的父进程post-commit已经退出,成为僵尸进程。
    四、此时apache的状态
    同样是gdb附加到进程上,执行各个线程显示
    (gdb) bt
    #0  0x00cad424 in __kernel_vsyscall ()
    #1  0x002e78f6 in epoll_wait () from /lib/libc.so.6
    #2  0x009b8bd9 in impl_pollset_poll (pollset=0x99bce70, timeout=100, 
        num=0xa71e6284, descriptors=0xa71e6288) at poll/unix/epoll.c:256
    #3  0x009ba574 in apr_pollset_poll (pollset=0x99bce70, timeout=100000, 
        num=0xa71e6284, descriptors=0xa71e6288) at poll/unix/pollset.c:343
    #4  0x080aca7c in listener_thread (thd=0x99bd590, dummy=0xb6c014c0)
        at event.c:1392
    #5  0x009bfe0d in dummy_worker (opaque=0x99bd590) at threadproc/unix/thread.c:142
    #6  0x00391925 in start_thread () from /lib/libpthread.so.0
    #7  0x002e707e in clone () from /lib/libc.so.6
    (gdb) info thread
      18 Thread 0xb11f6b70 (LWP 6221)  0x00cad424 in __kernel_vsyscall ()
    * 2 Thread 0xa71e6b70 (LWP 6237)  0x00cad424 in __kernel_vsyscall ()
      1 Thread 0xb77bd9c0 (LWP 6210)  0x00cad424 in __kernel_vsyscall ()
    (gdb) thread 1
    [Switching to thread 1 (Thread 0xb77bd9c0 (LWP 6210))]#0  0x00cad424 in __kernel_vsyscall ()
    (gdb) bt
    #0  0x00cad424 in __kernel_vsyscall ()
    #1  0x002df451 in select () from /lib/libc.so.6
    #2  0x009c0baf in apr_sleep (t=500000) at time/unix/time.c:246
    #3  0x080ae2fb in join_workers (listener=0x99bd590, threads=0x9a954c0)
        at event.c:1959
    #4  0x080ae895 in child_main (child_num_arg=0) at event.c:2109
    #5  0x080ae9b8 in make_child (s=0x997cea8, slot=0) at event.c:2169
    #6  0x080aeb02 in startup_children (number_to_start=3) at event.c:2233
    #7  0x080af475 in event_run (_pconf=0x99580a8, plog=0x99b8aa0, s=0x997cea8)
        at event.c:2561
    #8  0x0806ef8f in ap_run_mpm (pconf=0x99580a8, plog=0x99b8aa0, s=0x997cea8)
        at mpm_common.c:98
    #9  0x08068eef in main (argc=2, argv=0xbfab1d24) at main.c:777
    (gdb) thread 18
    [Switching to thread 18 (Thread 0xb11f6b70 (LWP 6221))]#0  0x00cad424 in __kernel_vsyscall ()
    (gdb) bt
    #0  0x00cad424 in __kernel_vsyscall ()
    #1  0x00398eeb in read () from /lib/libpthread.so.0
    #2  0x009ad41e in apr_file_read (thefile=0x9acff20, buf=0xb6d50020, 
        nbytes=0xb11f5e18) at file_io/unix/readwrite.c:116
    #3  0x0043da48 in svn_io_file_read (file=0x9acff20, buf=0xb6d50020, 
        nbytes=0xb11f5e18, pool=0x9aca898) at subversion/libsvn_subr/io.c:3132
    #4  0x0043b324 in stringbuf_from_aprfile (result=0xb11f5e9c, filename=0x0, 
        file=0x9acff20, check_size=1, pool=0x9aca898)
        at subversion/libsvn_subr/io.c:2049
    #5  0x0043b633 in svn_stringbuf_from_aprfile (result=0xb11f5e9c, file=0x9acff20, 
        pool=0x9aca898) at subversion/libsvn_subr/io.c:2106
    #6  0x00bf72a9 in check_hook_result (name=0xc10126 "post-commit", 
        cmd=0x9acfd60 "/svnrepo/tsecer/hooks/post-commit", cmd_proc=0xb11f5f10, 
        read_errhandle=0x9acff20, pool=0x9aca898)
        at subversion/libsvn_repos/hooks.c:71
    #7  0x00bf77ef in run_hook_cmd (result=0x0, name=0xc10126 "post-commit", 
        cmd=0x9acfd60 "/svnrepo/tsecer/hooks/post-commit", args=0xb11f5f64, 
        stdin_handle=0x0, pool=0x9aca898) at subversion/libsvn_repos/hooks.c:211
    #8  0x00bf81df in svn_repos__hooks_post_commit (repos=0xb6c032c8, rev=1, 
        pool=0x9aca898) at subversion/libsvn_repos/hooks.c:469
    #9  0x00bf577f in svn_repos_fs_commit_txn (conflict_p=0xb11f6000, 
        repos=0xb6c032c8, new_rev=0xb11f5ffc, txn=0x9acc520, pool=0x9aca898)
        at subversion/libsvn_repos/fs-wrap.c:64
    ---Type <return> to continue, or q <return> to quit---
    #10 0x003c899e in merge (target=0x9aca338, source=0x9aa4948, no_auto_merge=1, 
        no_checkout=1, prop_elem=0x9aca0a0, output=0x9acb470)
        at subversion/mod_dav_svn/version.c:1426
    #11 0x00175f82 in dav_method_merge (r=0x9aca8d8) at mod_dav.c:4399
    #12 0x00176a36 in dav_handler (r=0x9aca8d8) at mod_dav.c:4778
    #13 0x0808aeda in ap_run_handler (r=0x9aca8d8) at config.c:169
    #14 0x0808b5f2 in ap_invoke_handler (r=0x9aca8d8) at config.c:432
    #15 0x080a2c68 in ap_process_async_request (r=0x9aca8d8) at http_request.c:317
    #16 0x0809f3cf in ap_process_http_async_connection (c=0xb6c01c00)
        at http_core.c:143
    #17 0x0809f5a2 in ap_process_http_connection (c=0xb6c01c00) at http_core.c:228
    #18 0x08095ee3 in ap_run_process_connection (c=0xb6c01c00) at connection.c:41
    #19 0x080ab9ef in process_socket (thd=0x99bd390, p=0xb6c019c8, sock=0xb6c01a10, 
        cs=0xb6c01bb8, my_child_num=0, my_thread_num=9) at event.c:917
    #20 0x080adb14 in worker_thread (thd=0x99bd390, dummy=0xb6c00a40) at event.c:1744
    #21 0x009bfe0d in dummy_worker (opaque=0x99bd390) at threadproc/unix/thread.c:142
    #22 0x00391925 in start_thread () from /lib/libpthread.so.0
    #23 0x002e707e in clone () from /lib/libc.so.6
    (gdb) frame 2
    #2  0x009ad41e in apr_file_read (thefile=0x9acff20, buf=0xb6d50020, 
        nbytes=0xb11f5e18) at file_io/unix/readwrite.c:116
    116                rv = read(thefile->filedes, buf, *nbytes);
    (gdb) p *thefile
    $2 = {pool = 0x9aca898, filedes = 12, fname = 0x0, flags = 0, eof_hit = 0, 
      is_pipe = 1, timeout = -1, buffered = 0, blocking = BLK_ON, ungetchar = -1, 
      buffer = 0x0, bufpos = 0, bufsize = 0, dataRead = 0, direction = 0, 
      filePtr = 0, thlock = 0x0}
    (gdb) shell ls /proc/6210/fd/12
    /proc/6210/fd/12
    (gdb) shell ls /proc/6210/fd/12 -l
    lr-x------. 1 root root 64 2012-10-14 13:34 /proc/6210/fd/12 -> pipe:[4017708]
    (gdb) 
    [root@Harry hooks]# ll  /proc/19691/fd
    total 0
    lr-x------. 1 daemon daemon 64 2012-10-14 13:35 0 -> /dev/null
    l-wx------. 1 daemon daemon 64 2012-10-14 13:35 1 -> /dev/null
    l-wx------. 1 daemon daemon 64 2012-10-14 13:20 2 -> pipe:[4017708]
    可以看到,apache的父进程在等待子进程的标准错误结束,如果子进程的标准错误一直没有关闭,那么父进程将会一直等待。
    五、内核中关于文件进程和关闭
    当派生一个新的线程时,此时子进程会增加文件的引用计数。
    copy_process--->>>copy_files--->>>dup_fd
        for (i = open_files; i != 0; i--) {
            struct file *f = *old_fds++;
            if (f) {
                get_file(f);
            }
    其中
    #define get_file(x)    atomic_inc(&(x)->f_count)
    只是增加了文件的引用计数。
    当进程退出关闭一个文件的时候,执行操作为
    void fastcall fput(struct file *file)
    {
        if (atomic_dec_and_test(&file->f_count))
            __fput(file);
    }
    此时由于sleep进程是打开了pipe文件的一个进程,所以它也会导致所有的文件关闭的时候也无法将pipe释放,即使它的父进程已经变成init进程。这也是为什么需要在svn的post-commit中将后台任务的标准错误也重定向到另外一个文件的原因。只有当管道关闭之后,read系统调用才会返回EOF。

    这一点对于svn来说,当post-commit失败的时候,它需要知道错误输出的内容,提示给svn的客户端,所以它从子进程的标准错误中读取数据也是合理的。
  • 相关阅读:
    fitnesse的安装
    elasticsearh 迁移
    网络基础之 二层三层网络通讯
    ansible 基本使用之3 palybook
    ansible-基本使用-2
    ansible 基本使用-1
    k8s 机器搭建之etcd
    http 状态码之3xx
    mysql 主从相关
    redis 主从及哨兵模式
  • 原文地址:https://www.cnblogs.com/tsecer/p/10487810.html
Copyright © 2011-2022 走看看