zoukankan      html  css  js  c++  java
  • linux 捕获信号处理中遇到的死锁

    tag: 信号 signal  sigchld  死锁 堆栈

    我们的程序需要捕获信号自己处理,所以尝试对1-32的信号处理(后面33-64的信号不处理)。
    但是在调试代码时,发现一个线程死锁的问题。
    程序目的:捕获信号,然后打印堆栈。

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    伪代码如下:
    设置捕获信号函数()
    {
     //设置信号处理函数
     sigact.sa_sigaction = TsSigHandler;
       
     //
     //这里捕获了很多信号,包括SIGCHLD:子进程结束,父进程会收到该信号
     sigaction( SIGSEGV, &sigact, NULL );
     ....
     sigaction( SIGCHLD, &sigact, NULL );
       
    }
      
    信号处理函数:TsSigHandler
    {
     //调用打印堆栈函数
     PrintStack();
    }
      
    打印堆栈函数PrintStack
    {
     //打印堆栈
     backtrace();
     backtrace_symbols();
       
     //调用system函数执行一些命令
     system("xxxxxx");
    }


    Thread 12 (Thread 0xf7dd2b90 (LWP 5770)):

    以下是一个让我觉得奇怪的堆栈,奇怪之处:
    1.死锁了:__lll_lock_wait_private
    2.获得了2个信号:<signal handler called>,为什么不是一个一个信号处理

    堆栈如下:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    #0  0xffffe410 in __kernel_vsyscall ()
    #1  0x002a0783 in __lll_lock_wait_private () from /lib/libc.so.6
    #2  0x001f8448 in _L_lock_124 () from /lib/libc.so.6
    #3  0x001f7f8b in do_system () from /lib/libc.so.6
    #4  0x001f8412 in system () from /lib/libc.so.6
    #5  0x00317ead in system () from /lib/libpthread.so.0
    #6  0x080f95c1 in PrintStack() ()
    #7  0x080f9844 in TsSigHandler(int, siginfo*, void*) ()
    #8  <signal handler called>
    #9  0xffffe410 in __kernel_vsyscall ()
    #10 0x001eb1a9 in sigprocmask () from /lib/libc.so.6
    #11 0x001f8132 in do_system () from /lib/libc.so.6
    #12 0x001f8412 in system () from /lib/libc.so.6
    #13 0x00317ead in system () from /lib/libpthread.so.0
    #14 0x080f95c1 in PrintStack() ()
    #15 0x080f9844 in TsSigHandler(int, siginfo*, void*) ()
    #16 <signal handler called>
    #17 0x002338ec in memcpy () from /lib/libc.so.6
    #18 0x0804fa02 in boom ()
    #19 0x080dbd9c in RunCmd ()
    #20 0x080dbf12 in CmdParse ()
    #21 0x080dc705 in OspTeleDaemon ()
    #22 0x080f8817 in OspTaskTemplateFunc(void*) ()
    #23 0x0030f832 in start_thread () from /lib/libpthread.so.0
    #24 0x00293e0e in clone () from /lib/libc.so.6

    #18 0x0804fa02 in boom ()
     boom()是我写的一个制造崩溃的函数:
     
     char *pBoom = NULL;
     memcpy( pBoom, "aaaa", 100 );

    #16 <signal handler called>
    触发信号

    #15 0x080f9844 in TsSigHandler(int, siginfo*, void*) ()
     TsSigHandler是信号处理函数。通过以下代码设置:
     struct sigaction sigact;
     sigemptyset( &sigact.sa_mask );
     sigact.sa_flags = SA_ONESHOT | SA_SIGINFO;
     sigact.sa_sigaction = TsSigHandler;
     信号触发后,由TsSigHandler函数处理
     
    #14 0x080f95c1 in PrintStack() ()
     TsSigHandler函数中调用PrintStack函数打印堆栈。

    #13 0x00317ead in system () from /lib/libpthread.so.0
     PrintStack函数中调用了system函数做一些额外的事情,例如执行gcore(事实证明,这种方法是有点问题的)。

    #11 0x001f8132 in do_system () from /lib/libc.so.6
     system调用了do_system
     
    #10 0x001eb1a9 in sigprocmask () from /lib/libc.so.6
     do_system调用sigprocmask

    #8  <signal handler called>
     关键来了:这是获取到了另外一个信号:SIGCHLD。

    #7  0x080f9844 in TsSigHandler(int, siginfo*, void*) ()
     又调用信号处理函数TsSigHandler
    #3  0x001f7f8b in do_system () from /lib/libc.so.6
     system调用do_system,调用流程和上面当然是一样的
     
    #2  0x001f8448 in _L_lock_124 () from /lib/libc.so.6
    #1  0x002a0783 in __lll_lock_wait_private () from /lib/libc.so.6
     nice!锁住了。。
     

    分析:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    70
    71
    72
    73
    74
    75
    76
    77
    78
    79
    80
    81
    82
    83
    84
    85
    86
    87
    88
    89
    90
    91
    92
    93
    94
    95
    96
    97
    98
    99
    100
    101
    102
    103
    104
    105
    106
    107
    108
    109
    110
    111
    112
    113
    114
    115
    116
    117
    118
    119
    120
    121
    122
    123
    /* Execute LINE as a shell command, returning its status.  */
    static int
    do_system (const char *line)
    {
      int status, save;
      pid_t pid;
      struct sigaction sa;
    #ifndef _LIBC_REENTRANT
      struct sigaction intr, quit;
    #endif
      sigset_t omask;
     
      sa.sa_handler = SIG_IGN;
      sa.sa_flags = 0;
      __sigemptyset (&sa.sa_mask);
     
      DO_LOCK ();
      if (ADD_REF () == 0)
        {
          if (__sigaction (SIGINT, &sa, &intr) < 0)
        {
          (void) SUB_REF ();
          goto out;
        }
          if (__sigaction (SIGQUIT, &sa, &quit) < 0)
        {
          save = errno;
          (void) SUB_REF ();
          goto out_restore_sigint;
        }
        }
      DO_UNLOCK ();
     
      /* We reuse the bitmap in the 'sa' structure.  */
      __sigaddset (&sa.sa_mask, SIGCHLD);
      save = errno;
      if (__sigprocmask (SIG_BLOCK, &sa.sa_mask, &omask) < 0)
        {
    #ifndef _LIBC
          if (errno == ENOSYS)
        __set_errno (save);
          else
    #endif
        {
          DO_LOCK ();
          if (SUB_REF () == 0)
            {
              save = errno;
              (void) __sigaction (SIGQUIT, &quit, (struct sigaction *) NULL);
            out_restore_sigint:
              (void) __sigaction (SIGINT, &intr, (struct sigaction *) NULL);
              __set_errno (save);
            }
        out:
          DO_UNLOCK ();
          return -1;
        }
        }
     
    #ifdef CLEANUP_HANDLER
      CLEANUP_HANDLER;
    #endif
     
    #ifdef FORK
      pid = FORK ();
    #else
      pid = __fork ();
    #endif
      if (pid == (pid_t) 0)
        {
          /* Child side.  */
          const char *new_argv[4];
          new_argv[0] = SHELL_NAME;
          new_argv[1] = "-c";
          new_argv[2] = line;
          new_argv[3] = NULL;
     
          /* Restore the signals.  */
          (void) __sigaction (SIGINT, &intr, (struct sigaction *) NULL);
          (void) __sigaction (SIGQUIT, &quit, (struct sigaction *) NULL);
          (void) __sigprocmask (SIG_SETMASK, &omask, (sigset_t *) NULL);
          INIT_LOCK ();
     
          /* Exec the shell.  */
          (void) __execve (SHELL_PATH, (char *const *) new_argv, __environ);
          _exit (127);
        }
      else if (pid < (pid_t) 0)
        /* The fork failed.  */
        status = -1;
      else
        /* Parent side.  */
        {
          /* Note the system() is a cancellation point.  But since we call
         waitpid() which itself is a cancellation point we do not
         have to do anything here.  */
          if (TEMP_FAILURE_RETRY (__waitpid (pid, &status, 0)) != pid)
        status = -1;
        }
     
    #ifdef CLEANUP_HANDLER
      CLEANUP_RESET;
    #endif
     
      save = errno;
      DO_LOCK ();
      if ((SUB_REF () == 0
           && (__sigaction (SIGINT, &intr, (struct sigaction *) NULL)
           | __sigaction (SIGQUIT, &quit, (struct sigaction *) NULL)) != 0)
          || __sigprocmask (SIG_SETMASK, &omask, (sigset_t *) NULL) != 0)
        {
    #ifndef _LIBC
          /* glibc cannot be used on systems without waitpid.  */
          if (errno == ENOSYS)
        __set_errno (save);
          else
    #endif
        status = -1;
        }
      DO_UNLOCK ();
     
      return status;
    }


    system()函数执行的大体过程是:fork()->exec()->waitpid(),
    waitpid用于等待子进程执行完毕。
    但是在子进程执行完毕时,会产生SIGCHLD信号,
    而SIGCHLD信号会唤醒wait中的进程,这就是看到了2个信号的原因,

    解决方法:
    1.忽略SIGCHLD信号:其实这个信号一般情况下应该被忽略,除非你的程序需要对这种情况做非常特殊的处理
    2.不要在这里调用system()

    to do: 有空了记得补详细些


     

  • 相关阅读:
    关于向量叉积求得法向量方向判断
    Winform菜单之ContextMenuStrip
    Winform菜单之Menustrip
    MDI窗体及涉及到的相关问题
    Winform主窗体的设置
    Winform登录、控制软件只运行一次、回车登录
    MessageBox详解
    Winform窗体
    Winform创建解决方案
    Winform开发入门集中培训系列文章
  • 原文地址:https://www.cnblogs.com/solohac/p/4154183.html
Copyright © 2011-2022 走看看