zoukankan      html  css  js  c++  java
  • Linux signal 那些事儿(2)【转】

    转自:http://blog.chinaunix.net/uid-24774106-id-4064447.html

    上一篇博文,基本算是给glibc的signal函数翻了个身。现在glibc的signal基本修正了传统的UNIX的一些弊端,我们说signal并没有我们想象的那么不堪。但是signal也有不尽人意的地方。比如信号处理期间,我们期望屏蔽某些信号,而不仅仅是屏蔽自身,这时候signal就不行了。信号既然是进程间通信IPC的一种机制,我们期望获取更多的信息,而不仅仅是signo,这时候signal/kill这个机制就基本不行了。
        上面所说的都是signal的一些毛病,但是这些都不是致命的,致命的问题在于老的signal机制的不可靠。信号分成可靠性信号和非可靠性信号,并不是说用sigaction安装,用sigqueue发送的信号就是可靠性性信号,用signal安装,kill/tkill发送的信号就是非可靠性信号。这种理解是错误的。这在Linux环境进程间通信(二):信号(上)一文中讲的非常清楚了。
        信号值位于[SIGRTMIN,SIGRTMAX] 之间的信号,就是可靠信号,位于[SIGHUP,SIGSYS]之间信号,都是非可靠性信号,与安装函数是signal还是sigaction无关,与发送函数是kill还是sigqueue无关。

        1~31之间的所有信号都称为不可靠信号,原因就在于信号不可排队,如果kernel发现同一个信号已经有挂起信号,当前信号就会被丢弃,就好象从来没有被发送过一样,无法引起信号的传递,也无法让进程执行信号处理函数。这种实现的机理,造成了这些信号的不可靠。这正所谓:我本将心向明月,奈何明月照沟渠。
        为了解决这个问题,Linux引入了实时信号,信号值在[32~64]区间内,或者称之为可靠信号。这种信号,kernel不会ignore,哪怕已经有了好多同一个信号,kernel会把新收到信号放入queue之中,等待被传递出去。
        空口说白话,不是我们的风格,我现在用代码证明之。我参考了Linux Programming Interface 一书的例子,写了两个程序,一个是signal_receiver ,一个是signal_sender.
        先看signal_receiver的code:    

    1. manu@manu-hacks:~/code/c/self/signal$ cat signal_receiver.c
    2. #include <stdio.h>
    3. #include <stdlib.h>
    4. #include <unistd.h>
    5. #include <signal.h>
    6. #include <string.h>
    7. #include <errno.h>


    8. static int sig_cnt[NSIG];
    9. static volatile sig_atomic_t get_SIGINT = 0;

    10. void handler(int signo)
    11. {
    12.     if(signo == SIGINT)
    13.         get_SIGINT = 1;
    14.     else
    15.         sig_cnt[signo]++;
    16. }

    17. int main(int argc,char* argv[])
    18. {
    19.     int i = 0;
    20.     sigset_t blockall_mask ;
    21.     sigset_t pending_mask ;
    22.     sigset_t empty_mask ;
    23.     printf("%s:PID is %ld ",argv[0],getpid());

    24.     
    25.     for(i = 1; i < NSIG; i++)
    26.     {
    27.         if(i == SIGKILL || i == SIGSTOP)
    28.             continue;

    29.         if(signal(i,&handler) == SIG_ERR)
    30.         {
    31.             fprintf(stderr,"signal for signo(%d) failed (%s) ",i,strerror(errno));
    32. //            return -1;
    33.         }
    34.     }

    35.     if(argc > 1)
    36.     {
    37.         int sleep_time = atoi(argv[1]);
    38.         sigfillset(&blockall_mask);

    39.         if(sigprocmask(SIG_SETMASK,&blockall_mask,NULL) == -1)
    40.         {
    41.             fprintf(stderr,"setprocmask to block all signal failed(%s) ",strerror(errno));
    42.             return -2;
    43.         }

    44.         printf("I will sleep %d second ",sleep_time);

    45.         sleep(sleep_time);
    46.         if(sigpending(&pending_mask) == -1)
    47.         {
    48.             fprintf(stderr,"sigpending failed(%s) ",strerror(errno));
    49.             return -2;
    50.         }

    51.         for(i = 1 ; i < NSIG ; i++)
    52.         {
    53.             if(sigismember(&pending_mask,i))
    54.             printf("signo(%d) :%s ",i,strsignal(i));
    55.         }

    56.         sigemptyset(&empty_mask);
    57.         if(sigprocmask(SIG_SETMASK,&empty_mask,NULL) == -1)
    58.         {
    59.             fprintf(stderr,"setprocmask to release all signal failed(%s) ",strerror(errno));
    60.             return -3;
    61.         }
    62.         
    63.     }

    64.     while(!get_SIGINT)
    65.         continue ; //why not use pause ? I will explain later

    66.     for(i = 1; i < NSIG ; i++)
    67.     {
    68.         if(sig_cnt[i] != 0 )
    69.         {
    70.             printf("%s:signal %d caught %d time%s ",
    71.                     argv[0],i,sig_cnt[i],(sig_cnt[i] >1)?"s":"");
    72.         }
    73.     }

    74.     return 0;

    75. }

         因为我们知道,SIGKILL和SIGSTOP这两个信号是不能够定制自己的信号处理函数的,当然也不能block,原因很简单,OS或者说root才是final boss,必须有稳定终结进程的办法。假如所有的信号,进程都能ignore,OS如何终结进程?
        这个signal_receiver会等待所有的信号,接收到某信号后,该信号的捕捉到的次数++,SIGINT会终结进程,进程退出前,会打印信号的捕捉统计。
        如果进程有参数,表示sleep时间,signal_receiver会先屏蔽所有信号(当然,SIGKILL和SIGSTOP并不能被真正屏蔽)。然后sleep 一段时间后,取消信号屏蔽。我们可以想象,在信号屏蔽期间,我们收到的信号,都会在kernel记录下来,但是并不能delivery,这种信号称之挂起信号。如果在sleep期间或者说信号屏蔽期间,我收到SIGUSR1 这个信号1次和10000次,对内核来说,都是没差别的,因为后面的9999次都会被ignore掉。SIGUSR1属于不可靠信号,位图表示有没有挂起信号,有的话,直接ignore,没有的话,则记录在kernel。
        然后我们看下,signal_sender: 

    1. manu@manu-hacks:~/code/c/self/signal$ cat signal_sender.c
    2. #include <stdio.h>
    3. #include <stdlib.h>
    4. #include <getopt.h>
    5. #include <signal.h>
    6. #include <string.h>
    7. #include <errno.h>

    8. void usage()
    9. {
    10.     fprintf(stderr,"USAGE: ");
    11.     fprintf(stderr,"-------------------------------- ");
    12.     fprintf(stderr,"signal_sender pid signo times ");
    13. }

    14. int main(int argc,char* argv[])
    15. {
    16.     pid_t pid = -1 ;
    17.     int signo = -1;
    18.     int times = -1;
    19.     int i ;


    20.     if(argc < 4 )
    21.     {
    22.         usage();
    23.         return -1;
    24.     }
    25.     
    26.     pid = atol(argv[1]);
    27.     signo = atoi(argv[2]);
    28.     times = atoi(argv[3]);

    29.     if(pid <= 0 || times < 0 || signo <1 ||signo >=64 ||signo == 32 || signo ==33)
    30.     {
    31.         usage();
    32.         return -1;
    33.     }

    34.     printf("pid = %ld,signo = %d,times = %d ",pid,signo,times);

    35.     for( i = 0 ; i < times ; i++)
    36.     {
    37.         if(kill(pid,signo) == -1)
    38.         {
    39.             fprintf(stderr, "send signo(%d) to pid(%ld) failed,reason(%s) ",signo,pid,strerror(errno));
    40.             return -2;
    41.         }
    42.     }
    43.     fprintf(stdout,"done ");
    44.     return 0;

    45. }

         signal_sender需要三个参数,pid signo times,就是向拿个进程发送什么信号多少次的意思。如 signal_sender 1234 10 10000,含义是向pid=1234的 进程发送10号信号(SIGUSR1),连续发送10000次。
        有这两个进程,我们就可以实验了  。 

    1. manu@manu-hacks:~/code/c/self/signal$ ./signal_receiver &
    2. [1] 23416
    3. manu@manu-hacks:~/code/c/self/signal$ ./signal_receiver:PID is 23416
    4. signal for signo(32) failed (Invalid argument)
    5. signal for signo(33) failed (Invalid argument)

    6. manu@manu-hacks:~/code/c/self/signal$ ./signal_sender 23416 10 10000
    7. pid = 23416,signo = 10,times = 10000
    8. done
    9. manu@manu-hacks:~/code/c/self/signal$ sleep 20 ; ./signal_sender 23416 2 1
    10. pid = 23416,signo = 2,times = 1
    11. done
    12. ./signal_receiver:signal 10 caught 2507 times
    13. [1]+ Done ./signal_receiver

        signal_receiver等待signal的来临,singal_sender向其发送SIGUSR1 10000次,然后sleep 20秒,确保sig_receiver处理完成。但是我们发现,其实一共才caught信号SIGUSR1  2507次,7000多次的发送都丢失了,所以我们称SIGUSR1 是非可靠信号,存在丢信号的问题。
        俗话说不怕不识货,就怕货比货 ,我们让可靠信号参战,看下效果:

    1. manu@manu-hacks:~/code/c/self/signal$ ./signal_receiver &
    2. [1] 26067
    3. ./signal_receiver:PID is 26067
    4. signal for signo(32) failed (Invalid argument)
    5. signal for signo(33) failed (Invalid argument)
    6. manu@manu-hacks:~/code/c/self/signal$ ./signal_sender 26067 10 10000
    7. pid = 26067,signo = 10,times = 10000
    8. done
    9. manu@manu-hacks:~/code/c/self/signal$ ./signal_sender 26067 36 10000
    10. pid = 26067,signo = 36,times = 10000
    11. done
    12. manu@manu-hacks:~/code/c/self/signal$ ./signal_sender 26067 2 1
    13. pid = 26067,signo = 2,times = 1
    14. done
    15. ./signal_receiver:signal 10 caught 2879 times
    16. ./signal_receiver:signal 36 caught 10000 times
    17. [1]+ Done ./signal_receiver

        可靠性信号36,发送10000次,signal_receiver全部收到,不可靠性信号10,共收到2879次。这个数字是不可预期的,取决于内核进程的调度。
        这个如果还不够直观,我们在比较一次,让signal_receiver先屏蔽所有信号一段时间,如30s,然后解除屏蔽。

    1. manu@manu-hacks:~/code/c/self/signal$ ./signal_receiver 30 &
    2. [1] 27639
    3. manu@manu-hacks:~/code/c/self/signal$ ./signal_receiver:PID is 27639
    4. signal for signo(32) failed (Invalid argument)
    5. signal for signo(33) failed (Invalid argument)
    6. I will sleep 30 second

    7. manu@manu-hacks:~/code/c/self/signal$ ./signal_sender 27639 10 10000
    8. pid = 27639,signo = 10,times = 10000
    9. done
    10. manu@manu-hacks:~/code/c/self/signal$ ./signal_sender 27639 36 10000
    11. pid = 27639,signo = 36,times = 10000
    12. done
    13. manu@manu-hacks:~/code/c/self/signal$
    14. manu@manu-hacks:~/code/c/self/signal$ signo(10) :User defined signal 1
    15. signo(36) :Real-time signal 2

    16. manu@manu-hacks:~/code/c/self/signal$ ./signal_sender 27639 2 1
    17. pid = 27639,signo = 2,times = 1
    18. done
    19. ./signal_receiver:signal 10 caught 1 time
    20. ./signal_receiver:signal 36 caught 10000 times
    21. [1]+ Done ./signal_receiver 30

          这个比较反差比较大,不可靠signal10 共收到1次,可靠性信号36 共caught到10000次。原因就在于sigprocmask将所有的信号都屏蔽了,造成所有的信号都不能delivery。对1~31的信号,内核发现已经有相应的挂起信号,则ignore到新来的信号。但是可靠性信号则不同,会添加队列中去,尽管已经有了相同的信号。需要注意的是,signal pending有上限,并不能无限制的发:

    1. manu@manu-hacks:~/code/c/self/signal$ ulimit -a
    2. core file size (blocks, -c) 0
    3. data seg size (kbytes, -d) unlimited
    4. scheduling priority (-e) 0
    5. file size (blocks, -f) unlimited
    6. pending signals (-i) 15408
    7. max locked memory (kbytes, -l) 64
    8. max memory size (kbytes, -m) unlimited
    9. open files (-n) 1024
    10. pipe size (512 bytes, -p) 8
    11. POSIX message queues (bytes, -q) 819200
    12. real-time priority (-r) 0
    13. stack size (kbytes, -s) 8192
    14. cpu time (seconds, -t) unlimited
    15. max user processes (-u) 15408
    16. virtual memory (kbytes, -v) unlimited
    17. file locks (-x) unlimited

        我发送100万,最终会收到15408个可靠信号:

    1. manu@manu-hacks:~/code/c/self/signal$ ./signal_receiver 30 &
    2. [1] 16488
    3. manu@manu-hacks:~/code/c/self/signal$ ./signal_receiver:PID is 16488
    4. signal for signo(32) failed (Invalid argument)
    5. signal for signo(33) failed (Invalid argument)
    6. I will sleep 30 second

    7. manu@manu-hacks:~/code/c/self/signal$ ./signal_sender 16488 36 1000000
    8. pid = 16488,signo = 36,times = 1000000
    9. done
    10. manu@manu-hacks:~/code/c/self/signal$ signo(36) :Real-time signal 2

    11. manu@manu-hacks:~/code/c/self/signal$ ./signal_sender 16488 2 1
    12. pid = 16488,signo = 2,times = 1
    13. done
    14. ./signal_receiver:signal 36 caught 15408 times
    15. [1]+ Done ./signal_receiver 30

          内核是怎么做到的?
        
        上图是内核中signal相关的数据结构。其中task_struct中有sigpending类型的成员变量pending
        

    1. struct task_struct {
    2.     volatile long state; /* -1 unrunnable, 0 runnable, >0 stopped */
    3.     void *stack;
    4.     atomic_t usage;
    5.     unsigned int flags; /* per process flags, defined below */
    6.     unsigned int ptrace;
    7.         ...
    8.         ...
    9. /* signal handlers */
    10.     struct signal_struct *signal;
    11.     struct sighand_struct *sighand;

    12.     sigset_t blocked, real_blocked;
    13.     sigset_t saved_sigmask; /* restored if set_restore_sigmask() was used */
    14.     struct sigpending pending;

    15.        ...
    16. }

    17. struct signal_struct {
    18.       atomic_t     sigcnt;
    19.       atomic_t     live;
    20.       int     nr_threads;
    21.       ...
    22.       ...
    23.       /* shared signal handling: */
    24.       struct sigpending    shared_pending;
    25.       ...
    26. }

    27. struct sigpending {
    28.     struct list_head list;
    29.     sigset_t signal;
    30. };

    31. #define _NSIG        64

    32. #ifdef __i386__
    33. # define _NSIG_BPW    32
    34. #else
    35. # define _NSIG_BPW    64
    36. #endif

    37. #define _NSIG_WORDS    (_NSIG / _NSIG_BPW)

    38. typedef unsigned long old_sigset_t;        /* at least 32 bits */

    39. typedef struct {
    40.     unsigned long sig[_NSIG_WORDS];
    41. } sigset_t;

          task_struct中的pending,和signal->shared_pending都是记录挂起信号的数据结构,读到此处,你可能会迷惑,为何有两个这样的结构。这牵扯到thread与信号的一些问题,我们此处简化,就认为是一个就好,后面讲述线程与信号关系的时候,再展开。
          
        我们看到了,kill也好,tkill也罢,最终都走到了_send_signal.当然了kill系统调用根据pid的情况会分成多个分支如pid >0 pid = 0 pid=-1;pid < 0&pid !=-1,总之了,我的图只绘制了pid >0 的分支。tkill也有类似情况。
        那么kernel是怎么做到的非可靠信号和可靠信号的的这些差别的呢?

    1. static int __send_signal(int sig, struct siginfo *info, struct task_struct *t,
    2.             int group, int from_ancestor_ns)
    3. {
    4.     struct sigpending *pending;
    5.     struct sigqueue *q;
    6.     int override_rlimit;
    7.     int ret = 0, result;

    8.     assert_spin_locked(&t->sighand->siglock);

    9.     result = TRACE_SIGNAL_IGNORED;
    10.     if (!prepare_signal(sig, t,
    11.             from_ancestor_ns || (info == SEND_SIG_FORCED)))
    12.         goto ret;

    13.     pending = group ? &t->signal->shared_pending : &t->pending;
    14.     /*
    15.      * Short-circuit ignored signals and support queuing
    16.      * exactly one non-rt signal, so that we can get more
    17.      * detailed information about the cause of the signal.
    18.      */
    19.     result = TRACE_SIGNAL_ALREADY_PENDING;
    20.     if (legacy_queue(pending, sig)) //如果是低于32的信号,并且已经在pending中出现了的信号,就直接返回了,ignore
    21.         goto ret;

    22.     result = TRACE_SIGNAL_DELIVERED;
    23.     /*
    24.      * fast-pathed signals for kernel-internal things like SIGSTOP
    25.      * or SIGKILL.
    26.      */
    27.     if (info == SEND_SIG_FORCED)
    28.         goto out_set;

    29.     /*
    30.      * Real-time signals must be queued if sent by sigqueue, or
    31.      * some other real-time mechanism. It is implementation
    32.      * defined whether kill() does so. We attempt to do so, on
    33.      * the principle of least surprise, but since kill is not
    34.      * allowed to fail with EAGAIN when low on memory we just
    35.      * make sure at least one signal gets delivered and don't
    36.      * pass on the info struct.
    37.      */
    38.     if (sig < SIGRTMIN)
    39.         override_rlimit = (is_si_special(info) || info->si_code >= 0);
    40.     else
    41.         override_rlimit = 0;
          //分配sigqueue结构,并且链入到相应的pending。
    1.     q = __sigqueue_alloc(sig, t, GFP_ATOMIC | __GFP_NOTRACK_FALSE_POSITIVE,
    2.         override_rlimit);
    3.     if (q) {
    4.         list_add_tail(&q->list, &pending->list);
    5.         switch ((unsigned long) info) {
    6.         case (unsigned long) SEND_SIG_NOINFO:
    7.             q->info.si_signo = sig;
    8.             q->info.si_errno = 0;
    9.             q->info.si_code = SI_USER;
    10.             q->info.si_pid = task_tgid_nr_ns(current,
    11.                             task_active_pid_ns(t));
    12.             q->info.si_uid = from_kuid_munged(current_user_ns(), current_uid());
    13.             break;
    14.         case (unsigned long) SEND_SIG_PRIV:
    15.             q->info.si_signo = sig;
    16.             q->info.si_errno = 0;
    17.             q->info.si_code = SI_KERNEL;
    18.             q->info.si_pid = 0;
    19.             q->info.si_uid = 0;
    20.             break;
    21.         default:
    22.             copy_siginfo(&q->info, info);
    23.             if (from_ancestor_ns)
    24.                 q->info.si_pid = 0;
    25.             break;
    26.         }

    27.         userns_fixup_signal_uid(&q->info, t);

    28.     } else if (!is_si_special(info)) {
    29.         if (sig >= SIGRTMIN && info->si_code != SI_USER) {
    30.             /*
    31.              * Queue overflow, abort. We may abort if the
    32.              * signal was rt and sent by user using something
    33.              * other than kill().
    34.              */
    35.             result = TRACE_SIGNAL_OVERFLOW_FAIL;
    36.             ret = -EAGAIN;
    37.             goto ret;
    38.         } else {
    39.             /*
    40.              * This is a silent loss of information. We still
    41.              * send the signal, but the *info bits are lost.
    42.              */
    43.             result = TRACE_SIGNAL_LOSE_INFO;
    44.         }
    45.     }

    46. out_set:
    47.     signalfd_notify(t, sig);
    48.     sigaddset(&pending->signal, sig);  //加入位图
    49.     complete_signal(sig, t, group);
    50. ret:
    51.     trace_signal_generate(sig, info, t, group, result);
    52.     return ret;
    53. }

    54. static inline int legacy_queue(struct sigpending *signals, int sig)
    55. {
    56.     return (sig < SIGRTMIN) && sigismember(&signals->signal, sig); //是不可靠信号,并且该信号已经存在挂起信号,

        那么15408的限制是在哪里呢?在__sigqueue_alloc 里面。

    1. static struct sigqueue *
    2. __sigqueue_alloc(int sig, struct task_struct *t, gfp_t flags, int override_rlimit)
    3. {
    4.     struct sigqueue *q = NULL;
    5.     struct user_struct *user;

    6.     /*
    7.      * Protect access to @t credentials. This can go away when all
    8.      * callers hold rcu read lock.
    9.      */
    10.     rcu_read_lock();
    11.     user = get_uid(__task_cred(t)->user);
    12.     atomic_inc(&user->sigpending);    //计数器+1
    13.     rcu_read_unlock();

    14.     if (override_rlimit ||
    15.      atomic_read(&user->sigpending) <=
    16.             task_rlimit(t, RLIMIT_SIGPENDING)) {
    17.         q = kmem_cache_alloc(sigqueue_cachep, flags);
    18.     } else {
    19.         print_dropped_signal(sig);
    20.     }

    21.     if (unlikely(q == NULL)) {
    22.         atomic_dec(&user->sigpending);
    23.         free_uid(user);
    24.     } else {
    25.         INIT_LIST_HEAD(&q->list);
    26.         q->flags = 0;
    27.         q->user = user;
    28.     }

    29.     return q;
    30. }

         我们看到,legacy_queue就是用来判断是否是非可靠信号(signo低于32),并且相同signo值已经存在在挂起信号之中,如果是,直接返回。
         而对于可靠信号,会分配一个sigqueue的结构,然后讲sigqueue链入到sigpending结构的中链表中。从而就不会丢失信号。当然对pending信号的总数作了限制,限制最多不可超过15408.当然了这个值是可以修改的:
        


    参考文献:
    1 Linux programming interface
    2 深入理解linux内核
    3 linux kernel 3.8.0 内核源码 

  • 相关阅读:
    The Quad
    将OrCAD Capture CIS的设计文件(.dsn)导入到PADS Logic VX.2.3
    OrCAD Capture CIS 16.6 将版本16.6的设计文件另存为版本16.2的设计文件
    Eclipse IDE 添加jar包到Java工程中
    PADS Logic VX.2.3 修改软件界面语言
    切换Allegro PCB Editor
    Allegro PCB Design GXL (legacy) 将brd文件另存为低版本文件
    Allegro PCB Design GXL (legacy) 设置自动保存brd文件
    Could not create an acl object: Role '16'
    windows 下apache开启FastCGI
  • 原文地址:https://www.cnblogs.com/sky-heaven/p/6844543.html
Copyright © 2011-2022 走看看