zoukankan      html  css  js  c++  java
  • 记一次传递文件句柄引发的血案 (续)

    记一次传递文件句柄引发的血案 之后,这个 demo 又引发了一次血案,现录如下。

    这次我是在 linux 上测试文件句柄的传递,linux 上并没有 STREAMS 系统,

    因此是采用 unix domain socket 的 sendmsg/recvmsg 中控制消息部分来传递句柄的。

    代码的主要修改部分集中于发送 fd 与接收 fd 处,一开始代码是这样的,运行良好。

    spipe_fd.c

      1 #define MAXLINE 128
      2 #define RIGHTSLEN CMSG_LEN(sizeof(int))
      3 #define CREDSLEN CMSG_LEN(sizeof(struct CREDSTRUCT))
      4 #define CONTROLLEN (RIGHTSLEN+CREDSLEN)
      5 
      6 int send_fd (int fd, int fd_to_send)
      7 {
      8     struct iovec iov[1]; 
      9     struct msghdr msg; 
     10     struct cmsghdr *cmptr = NULL; 
     11     char buf[2]; 
     12 
     13     iov[0].iov_base = buf; 
     14     iov[0].iov_len = 2; 
     15 
     16     msg.msg_iov = iov; 
     17     msg.msg_iovlen = 1; 
     18     msg.msg_name = NULL; 
     19     msg.msg_namelen = 0; 
     20     msg.msg_flags = 0; 
     21 
     22     if (fd_to_send < 0) {
     23         msg.msg_control = NULL; 
     24         msg.msg_controllen = 0; 
     25         buf[1] = -fd_to_send; 
     26         if (buf[1] == 0)
     27             buf[1] = 1; 
     28     } else {
     29         if ((cmptr = malloc(CONTROLLEN)) == NULL) {
     30             fprintf (stderr, "malloc memory failed
    "); 
     31             return -1; 
     32         }
     33 
     34         msg.msg_control = cmptr; 
     35         msg.msg_controllen = CONTROLLEN; 
     36 
     37         cmptr->cmsg_level = SOL_SOCKET; 
     38         cmptr->cmsg_type = SCM_RIGHTS; 
     39         cmptr->cmsg_len = CONTROLLEN; 
     40 
     41         *(int *) CMSG_DATA(cmptr) = fd_to_send; 
     42         buf[1] = 0; 
     43     }
     44 
     45     buf[0] = 0; 
     46     if (sendmsg(fd, &msg, 0) != 2) {
     47         free (cmptr); 
     48         return -1; 
     49     }
     50 
     51     free (cmptr); 
     52     return 0; 
     53 }

    以上是发送句柄部分,重点位于 37-39 行,设置了控制消息的类型与句柄的值。

    sendmsg 中的数据消息部分,用来兼容出错的场景(出错时可以提供一个-1~-255的错误码,及一段描述信息),关键信息位于控制部分。

    下面来看消息的接收:

     1 int recv_fd (int fd, uid_t *uidptr, ssize_t (*userfunc) (int, const void*, size_t))
     2 {
     3     struct cmsghdr *cmptr = NULL; 
     4     int newfd, nr, status; 
     5     char *ptr; 
     6     char buf[MAXLINE]; 
     7     struct iovec iov[1]; 
     8     struct msghdr msg; 
     9 
    10     status = -1; 
    11     newfd = -1; 
    12 
    13     for (;;) {
    14         iov[0].iov_base = buf; 
    15         iov[0].iov_len = sizeof (buf); 
    16 
    17         msg.msg_iov = iov; 
    18         msg.msg_iovlen = 1; 
    19         msg.msg_name = NULL; 
    20         msg.msg_namelen = 0; 
    21 
    22         if ((cmptr = malloc (CONTROLLEN)) == NULL) {
    23             fprintf (stderr, "malloc error
    "); 
    24             return -1; 
    25         }
    26 
    27         msg.msg_control = cmptr; 
    28         msg.msg_controllen = CONTROLLEN; 
    29 
    30         if ((nr = recvmsg (fd, &msg, 0)) < 0) { 
    31             fprintf (stderr, "recvmsg error
    "); 
    32             free (cmptr); 
    33             return -1; 
    34         } else if (nr == 0) {
    35             fprintf (stderr, "connection closed by server
    "); 
    36             free (cmptr); 
    37             return -1; 
    38         }
    39 
    40         for (ptr = buf; ptr < &buf[nr]; ) {
    41             if (*ptr ++ == 0) {
    42                 if (ptr != &buf[nr-1]) {
    43                     fprintf (stderr, "message format error"); 
    44                     free (cmptr); 
    45                     return -1; 
    46                 }
    47 
    48                 status = *ptr & 0xff; 
    49                 if (status == 0) {
    50                     if (msg.msg_controllen != CONTROLLEN) { 
    51                         fprintf (stderr, "status = 0 but no fd
    "); 
    52                         free (cmptr); 
    53                         return -1; 
    54                     }
    55 
    56                     newfd = *(int *) CMSG_DATA(cmptr); 
    57                 } else { 
    58                     newfd = -status; 
    59                 }
    60 
    61                 nr -= 2; 
    62             }
    63         }
    64 
    65         free(cmptr); 
    66         if (nr > 0 && (*userfunc)(STDERR_FILENO, buf, nr) != nr)
    67             return -1; 
    68 
    69         if (status >= 0)
    70             return newfd; 
    71     }
    72 
    73     return -1; 
    74 }

    接收部分的重点位于 56 行,这里取得了对方传递过来的文件句柄(注意不是简单的值传递!参考上篇文章)

    其它一些代码则用来处理出错信息,当出现错误时,调用 userfunc 打印错误信息 (用户一般传递 write) 。

    另外接口中 uidptr 参数并没有用,这个是为将来扩展预留的。

    使用之前的 demo (spipe_server.c / spipe_client.c)编译、运行,输出结果如下:

    ./spipe_server ./spipe_client
    create pipe 3.4
    3 7
    create temp file /tmp/outliqA3i with fd 4
    seek to head
    send fd 4 to peer
    recv fd 3, position 0
    create temp file /tmp/inaLr30i with fd 4
    source: 3 7
    
    seek to head
    send fd 4
    recv fd 5 from peer, position 0
    10

    可以看到通过新的方式传递的文件句柄值也发生了变化(从 4 变为 3),且也需要对文件偏移进行重置,否则还会掉到之前文章说的那个坑里。

    问题出现在增加一些代码来传递发送进程凭证(如uid)时,此时发送方需要传递两个控制子消息(分别表示句柄与凭证),接收方也需要处理两个子消息。

    新的发送代码如下:

     1 #define MAXLINE 128
     2 #if defined(SCM_CREDS) // on BSD
     3 #define CREDSTRUCT cmsgcred
     4 #define CR_UID cmcred_uid
     5 #define CREDOPT LOCAL_PEERCRED
     6 #define SCM_CREDTYPE SCM_CREDS
     7 #elif defined(SCM_CREDENTIALS)  // on linux
     8 #define CREDSTRUCT ucred
     9 #define CR_UID uid
    10 #define CREDOPT SO_PASSCRED
    11 #define SCM_CREDTYPE SCM_CREDENTIALS
    12 #else
    13 #error passing credentials is unsupported!
    14 #endif
    15 
    16 #define RIGHTSLEN CMSG_LEN(sizeof(int))
    17 #define CREDSLEN CMSG_LEN(sizeof(struct CREDSTRUCT))
    18 #define CONTROLLEN (RIGHTSLEN+CREDSLEN)
    19 
    20 
    21 int send_fd (int fd, int fd_to_send)
    22 {
    23     struct iovec iov[1]; 
    24     struct msghdr msg; 
    25     struct cmsghdr *cmptr = NULL; 
    26     char buf[2]; 
    27     struct CREDSTRUCT *credp; 
    28     struct cmsghdr *cmp; 
    29 
    30     iov[0].iov_base = buf; 
    31     iov[0].iov_len = 2; 
    32 
    33     msg.msg_iov = iov; 
    34     msg.msg_iovlen = 1; 
    35     msg.msg_name = NULL; 
    36     msg.msg_namelen = 0; 
    37     msg.msg_flags = 0; 
    38 
    39     if (fd_to_send < 0) {
    40         msg.msg_control = NULL; 
    41         msg.msg_controllen = 0; 
    42         buf[1] = -fd_to_send; 
    43         if (buf[1] == 0)
    44             buf[1] = 1; 
    45     } else {
    46         if ((cmptr = malloc(CONTROLLEN)) == NULL) {
    47             fprintf (stderr, "malloc memory failed
    "); 
    48             return -1; 
    49         }
    50 
    51         msg.msg_control = cmptr; 
    52         msg.msg_controllen = CONTROLLEN; 
    53 
    54         cmp = cmptr; 
    55         cmp->cmsg_level = SOL_SOCKET; 
    56         cmp->cmsg_type = SCM_RIGHTS; 
    57         cmp->cmsg_len = RIGHTSLEN; 
    58         *(int *) CMSG_DATA(cmp) = fd_to_send; 
    59 
    60         cmp = CMSG_NXTHDR(&msg, cmp); 
    61         cmp->cmsg_level = SOL_SOCKET; 
    62         cmp->cmsg_type = SCM_CREDTYPE; 
    63         cmp->cmsg_len = CREDSLEN; 
    64         credp = (struct CREDSTRUCT *) CMSG_DATA(cmp); 
    65 
    66 #  if defined(SCM_CREDENTIALS)
    67         // only linux need to set members of this struct !
    68         credp->uid = getuid (); 
    69         credp->gid = getegid (); 
    70         credp->pid = getpid (); 
    71 #  endif
    72         buf[1] = 0; 
    73     }
    74 
    75     buf[0] = 0; 
    76     if (sendmsg(fd, &msg, 0) != 2) {
    77         free (cmptr); 
    78         return -1; 
    79     }
    80 
    81     free (cmptr); 
    82     return 0; 
    83 }

    最开始的一些宏定义,是用来区分 linux 与 bsd 上一些细节,重点在 55-64 行,这两段代码分别设置了句柄与凭证。

    然后控制消息的大小 CONTROLLEN 由两部分消息的长度(RIGHTSLEN 与 CREDSLEN)累加得到,分配的内存也是这么大。

    再来看接收部分:

      1 int recv_fd (int fd, uid_t *uidptr, ssize_t (*userfunc) (int, const void*, size_t))
      2 {
      3     struct cmsghdr *cmptr = NULL; 
      4 
      5     int newfd, nr, status; 
      6     char *ptr; 
      7     char buf[MAXLINE]; 
      8     struct iovec iov[1]; 
      9     struct msghdr msg; 
     10 
     11     status = -1; 
     12     newfd = -1; 
     13 
     14     const int on = -1; 
     15     struct cmsghdr *cmp; 
     16     struct CREDSTRUCT *credp; 
     17     if (setsockopt (fd, SOL_SOCKET, CREDOPT, &on, sizeof(int)) < 0) {
     18         fprintf (stderr, "setsockopt for %d failed
    ", CREDOPT); 
     19         return -1; 
     20     }
     21 
     22     for (;;) {
     23         iov[0].iov_base = buf; 
     24         iov[0].iov_len = sizeof (buf); 
     25 
     26         msg.msg_iov = iov; 
     27         msg.msg_iovlen = 1; 
     28         msg.msg_name = NULL; 
     29         msg.msg_namelen = 0; 
     30 
     31         if ((cmptr = malloc (CONTROLLEN)) == NULL) {
     32             fprintf (stderr, "malloc error
    "); 
     33             return -1; 
     34         }
     35 
     36         msg.msg_control = cmptr; 
     37         msg.msg_controllen = CONTROLLEN; 
     38 
     39         if ((nr = recvmsg (fd, &msg, 0)) < 0) { 
     40             fprintf (stderr, "recvmsg error
    "); 
     41             free (cmptr); 
     42             return -1; 
     43         } else if (nr == 0) {
     44             fprintf (stderr, "connection closed by server
    "); 
     45             free (cmptr); 
     46             return -1; 
     47         }
     48 
     49         for (ptr = buf; ptr < &buf[nr]; ) {
     50             if (*ptr ++ == 0) {
     51                 if (ptr != &buf[nr-1]) {
     52                     fprintf (stderr, "message format error"); 
     53                     free (cmptr); 
     54                     return -1; 
     55                 }
     56 
     57                 status = *ptr & 0xff; 
     58                 if (status == 0) {
     59                     if (msg.msg_controllen != CONTROLLEN) { 
     60                         fprintf (stderr, "status = 0 but no fd
    "); 
     61                         free (cmptr); 
     62                         return -1; 
     63                     }
     64 
     65                     for (cmp = CMSG_FIRSTHDR(&msg); cmp != NULL; cmp = CMSG_NXTHDR(&msg, cmp)) { 
     66                         if (cmp->cmsg_level != SOL_SOCKET) {
     67                             fprintf (stderr, "ignore unknown socket level %d
    ", cmp->cmsg_level); 
     68                             continue; 
     69                         }
     70 
     71                         switch (cmp->cmsg_type) {
     72                             case SCM_RIGHTS:
     73                                 newfd = *(int *) CMSG_DATA(cmp); 
     74                                 break; 
     75                             case SCM_CREDTYPE:
     76                                 credp = (struct CREDSTRUCT *) CMSG_DATA(cmp); 
     77                                 *uidptr = credp->CR_UID; 
     78                                 break; 
     79                             default:
     80                                 fprintf (stderr, "ignore unknown msg type %d
    ", cmp->cmsg_type); 
     81                                 break; 
     82                         }
     83                     }
     84                 } else { 
     85                     newfd = -status; 
     86                 }
     87 
     88                 nr -= 2; 
     89             }
     90         }
     91 
     92         free(cmptr); 
     93         if (nr > 0 && (*userfunc)(STDERR_FILENO, buf, nr) != nr)
     94             return -1; 
     95 
     96         if (status >= 0)
     97             return newfd; 
     98     }
     99 
    100     return -1; 
    101 }

    重点分为两个部分:

    14-20 行,设置 unix domain socket 可以接收凭证信息;

    65-83 行,分别读取控制消息中的句柄与凭证信息,这里我们取了发送进程的 uid 信息作为凭证返回给上层调用者;

    与发送消息类似,这里使用系统提供的 CMSG_FIRSTHDR、CMSG_NXTHDR 在控制消息中遍历各个子部分。

    重新编译、运行 demo,却发现出错了:

    ./spipe_server ./spipe_client
    create pipe 3.4
    3 7
    create temp file /tmp/outgQY1Y4 with fd 4
    seek to head
    send fd 4 to peer
    recv fd 3, uid 500, position 0
    create temp file /tmp/invVgKW4 with fd 4
    source: 3 7
    
    seek to head
    connection closed by server
    recv fd from peer failed, error -1
    

    从输出日志看,第一次从 server 发往 client 的句柄及凭证是可以的(line 7),再之后 client 处理完消息回传时,就出错了。

    首先定位出错代码位置,在 client 回传这里 (send_fd),加入一些日志:

     1         if ((cmptr = malloc(CONTROLLEN)) == NULL) {
     2             fprintf (stderr, "malloc memory failed
    "); 
     3             return -1; 
     4         }
     5 
     6         msg.msg_control = cmptr; 
     7         msg.msg_controllen = CONTROLLEN; 
     8 
     9         cmp = cmptr; 
    10         cmp->cmsg_level = SOL_SOCKET; 
    11         cmp->cmsg_type = SCM_RIGHTS; 
    12         cmp->cmsg_len = RIGHTSLEN; 
    13         *(int *) CMSG_DATA(cmp) = fd_to_send; 
    14         fprintf (stderr, "add fd with len %d
    ", RIGHTSLEN); 
    15 
    16         cmp = CMSG_NXTHDR(&msg, cmp); 
    17         cmp->cmsg_level = SOL_SOCKET; 
    18         cmp->cmsg_type = SCM_CREDTYPE; 
    19         cmp->cmsg_len = CREDSLEN; 
    20         credp = (struct CREDSTRUCT *) CMSG_DATA(cmp); 
    21         fprintf (stderr, "add credential with len %d
    ", CREDSLEN); 
    22 
    23 #  if defined(SCM_CREDENTIALS)
    24         // only linux need to set members of this struct !
    25         credp->uid = getuid (); 
    26         credp->gid = getegid (); 
    27         credp->pid = getpid (); 
    28         fprintf (stderr, "set uid %d, gid %d, pid %d
    ", credp->uid, credp->gid, credp->pid);
    29 #  endif
    30         buf[1] = 0; 

    标黄的是新加入的输出日志,再次编译运行:

    ./spipe_server ./spipe_client
    create pipe 3.4
    3 7
    create temp file /tmp/outivt2Og with fd 4
    seek to head
    add fd with len 16
    add credential with len 24
    set uid 500, gid 500, pid 12071
    send fd 4 to peer
    recv fd 3, uid 500, position 0
    create temp file /tmp/inHqRwMg with fd 4
    source: 3 7
    
    seek to head
    add fd with len 16
    connection closed by server
    recv fd from peer failed, error -1
    

    可以看到,第一次传递时,这三条日志全都正确输出了,而回传时,只输出了第一条日志。

    所以明显是在第一条日志与第二条日志之间的代码出了问题。左看右看,看不出这块有什么问题,难道系统提供的 CMSG_NXTHDR 会出错?

    这边再加两条日志:

     1         if ((cmptr = malloc(CONTROLLEN)) == NULL) {
     2             fprintf (stderr, "malloc memory failed
    "); 
     3             return -1; 
     4         }
     5 
     6         msg.msg_control = cmptr; 
     7         msg.msg_controllen = CONTROLLEN; 
     8 
     9         cmp = cmptr; 
    10         cmp->cmsg_level = SOL_SOCKET; 
    11         cmp->cmsg_type = SCM_RIGHTS; 
    12         cmp->cmsg_len = RIGHTSLEN; 
    13         *(int *) CMSG_DATA(cmp) = fd_to_send; 
    14         fprintf (stderr, "add fd with len %d
    ", RIGHTSLEN); 
    15         fprintf (stderr, "cmsghdr = %d, cmsglen = %d, after align = %d, control len = %d
    ", sizeof(struct cmsghdr), CREDSLEN, CMSG_ALIGN(CREDSLEN), CONTROLLEN); 
    16 
    17         cmp = CMSG_NXTHDR(&msg, cmp); 
    18         fprintf (stderr, "cmp = %p
    ", cmp); 
    19         cmp->cmsg_level = SOL_SOCKET; 
    20         cmp->cmsg_type = SCM_CREDTYPE; 
    21         cmp->cmsg_len = CREDSLEN; 
    22         credp = (struct CREDSTRUCT *) CMSG_DATA(cmp); 
    23         fprintf (stderr, "add credential with len %d
    ", CREDSLEN); 
    24 
    25 #  if defined(SCM_CREDENTIALS)
    26         // only linux need to set members of this struct !
    27         credp->uid = getuid (); 
    28         credp->gid = getegid (); 
    29         credp->pid = getpid (); 
    30         fprintf (stderr, "set uid %d, gid %d, pid %d
    ", credp->uid, credp->gid, credp->pid);
    31 #  endif
    32         buf[1] = 0; 

    第二条日志是主要怀疑的地方,看指针是否为空;第一条日志则是怀疑块大小计算有误,导致分配的内存不够大,指针递增时出现了范围错误,所以这里打印各种长度做验证。

    再次运行后,又多了一些输出:

    ./spipe_server ./spipe_client
    create pipe 3.4
    3 7
    create temp file /tmp/out7UgSYZ with fd 4
    seek to head
    add fd with len 16
    cmsghdr = 12, cmsglen = 24, after align = 24, control len = 40
    cmp = 0x9ded018
    add credential with len 24
    set uid 500, gid 500, pid 12100
    send fd 4 to peer
    recv fd 3, uid 500, position 0
    create temp file /tmp/inC3nyWZ with fd 4
    source: 3 7
    
    seek to head
    add fd with len 16
    cmsghdr = 12, cmsglen = 24, after align = 24, control len = 40
    cmp = (nil)
    connection closed by server
    recv fd from peer failed, error -1
    

    神奇的地方出现了,同样的代码,相同的尺寸,第一次指针正常;第二次就为空了!

    崩溃点找到了,但是还是一头雾水,看起来数据块都对齐了,计算也没毛病,难道是这个系统提供的宏 (CMSG_NXTHDR) 出问题了吗?

    翻看头文件,找到这一段的定义 (我所在的系统,位于 /usr/include/bits/socket.h (L311)):

     1 __EXTERN_INLINE struct cmsghdr *
     2 __NTH (__cmsg_nxthdr (struct msghdr *__mhdr, struct cmsghdr *__cmsg))
     3 {
     4   if ((size_t) __cmsg->cmsg_len < sizeof (struct cmsghdr))
     5     /* The kernel header does this so there may be a reason.  */
     6     return 0;
     7 
     8   __cmsg = (struct cmsghdr *) ((unsigned char *) __cmsg
     9                    + CMSG_ALIGN (__cmsg->cmsg_len));
    10   if ((unsigned char *) (__cmsg + 1) > ((unsigned char *) __mhdr->msg_control
    11                     + __mhdr->msg_controllen)
    12       || ((unsigned char *) __cmsg + CMSG_ALIGN (__cmsg->cmsg_len)
    13       > ((unsigned char *) __mhdr->msg_control + __mhdr->msg_controllen)))
    14     /* No more entries.  */
    15     return 0;
    16   return __cmsg;
    17 }

    这段 INLINE 函数主要包含三个判断,

    1)子消息长度小于消息头长度,返回 null;

    2)下一个子消息的消息头超出消息尾部,返回null;

    3)下一个子消息的消息体超出消息尾部,返回null;

    直接修改系统代码不方便,将这个函数拷贝到本地并重全名为 my_cmsg_nxthdr,在各个判断下面添加日志输出:

     1 struct cmsghdr *my_cmsg_nxthdr (struct msghdr *__mhdr, struct cmsghdr *__cmsg)
     2 {
     3   if ((size_t) __cmsg->cmsg_len < sizeof (struct cmsghdr)) {
     4     /* The kernel header does this so there may be a reason.  */
     5     fprintf (stderr, "in step1
    "); 
     6     return 0;
     7   }
     8 
     9   fprintf (stderr, "%p: cmsg_len %u, cmsg_level %d, cmsg_type %d
    ", __cmsg, __cmsg->cmsg_len, __cmsg->cmsg_level, __cmsg->cmsg_type); 
    10   __cmsg = (struct cmsghdr *) ((unsigned char *) __cmsg + CMSG_ALIGN (__cmsg->cmsg_len));
    11   if ((unsigned char *) (__cmsg + 1) > ((unsigned char *) __mhdr->msg_control + __mhdr->msg_controllen)) {
    12       fprintf (stderr, "in step2
    "); 
    13       return 0; 
    14   }
    15 
    16   fprintf (stderr, "%p: cmsg_len %u, cmsg_level %d, cmsg_type %d
    ", __cmsg, __cmsg->cmsg_len, __cmsg->cmsg_level, __cmsg->cmsg_type); 
    17   if (((unsigned char *) __cmsg + CMSG_ALIGN (__cmsg->cmsg_len) > ((unsigned char *) __mhdr->msg_control + __mhdr->msg_controllen))) {
    18     /* No more entries.  */
    19       fprintf (stderr, "in step3
    "); 
    20       fprintf (stderr, "msg len %d, after align %d, msg control %d
    ", __cmsg->cmsg_len, CMSG_ALIGN(__cmsg->cmsg_len), __mhdr->msg_controllen); 
    21     return 0;
    22   }
    23 
    24   fprintf (stderr, "in final step
    "); 
    25   return __cmsg;
    26 }

    为了便于根据不同的判断条件输出日志,这里对判断条件进行了拆分。

    再次运行 demo,输出如下:

    ./spipe_server ./spipe_client
    create pipe 3.4
    3 7
    create temp file /tmp/outh7NhIs with fd 4
    seek to head
    add fd with len 16
    cmsghdr = 12, cmsglen = 24, after align = 24, control len = 40
    0x9336008: cmsg_len 16, cmsg_level 1, cmsg_type 1
    0x9336018: cmsg_len 0, cmsg_level 0, cmsg_type 0
    in final step
    cmp = 0x9336018
    add credential with len 24
    set uid 500, gid 500, pid 12171
    send fd 4 to peer
    recv fd 3, uid 500, position 0
    create temp file /tmp/inoJMmKs with fd 4
    source: 3 7
    
    seek to head
    add fd with len 16
    cmsghdr = 12, cmsglen = 24, after align = 24, control len = 40
    0x904d008: cmsg_len 16, cmsg_level 1, cmsg_type 1
    0x904d018: cmsg_len 500, cmsg_level 500, cmsg_type 16
    in step3
    msg len 500, after align 500, msg control 40
    cmp = (nil)
    connection closed by server
    recv fd from peer failed, error -1
    

    原来是第三个判断出现了问题(Line 24)!

    消息总长度是 16 + 24 = 40,而这里的第二个子消息单个的长度达到 500,明显越界了。

    但是第二个子消息的长度明明是 24 呀,哪里跑出来的 500 呢?

    而且它的其它字段也明显不对,例如消息 level 也是 500,消息类型是 16 !

    初步可以确定是这块内存被弄乱了,而从前面打印的消息指针(0x904d008 与 0x904d018)看,分配的大小是没问题的,因此内存越界问题先排除掉;

    其次是我们设置好的内容……等等……我们好像还没有设置第二个子消息的内容!!

    ……

    垃圾数据!!

    ……

    malloc 之后没有清空的垃圾数据!!

    ……

    这也是第一次调用没问题而第二次掉坑里的原因,随着系统内存的分配回收而存在一定的随机性!

    找到原因之后,修改就简单了,可以将 malloc 替换为 calloc,或者简单的加一句 memset 来清空内存:

     1         if ((cmptr = malloc(CONTROLLEN)) == NULL) {
     2             fprintf (stderr, "malloc memory failed
    "); 
     3             return -1; 
     4         }
     5 
     6         // important on linux, garbage data may mess cmsg_len fields, 
     7         // and cause CMSG_NXTHDR return null on protection.
     8         memset (cmptr, 0, CONTROLLEN); 
     9         msg.msg_control = cmptr; 
    10         msg.msg_controllen = CONTROLLEN; 

    再次运行 demo,一切正常:

    ./spipe_server ./spipe_client
    create pipe 3.4
    3 7
    create temp file /tmp/outqsTYkp with fd 4
    seek to head
    add fd with len 16
    cmsghdr = 12, cmsglen = 24, after align = 24, control len = 40
    0x814c008: cmsg_len 16, cmsg_level 1, cmsg_type 1
    0x814c018: cmsg_len 0, cmsg_level 0, cmsg_type 0
    in final step
    cmp = 0x814c018
    add credential with len 24
    set uid 500, gid 500, pid 12207
    send fd 4 to peer
    recv fd 3, uid 500, position 0
    create temp file /tmp/in3ntkip with fd 4
    source: 3 7
    
    seek to head
    add fd with len 16
    cmsghdr = 12, cmsglen = 24, after align = 24, control len = 40
    0x8389008: cmsg_len 16, cmsg_level 1, cmsg_type 1
    0x8389018: cmsg_len 0, cmsg_level 0, cmsg_type 0
    in final step
    cmp = 0x8389018
    add credential with len 24
    set uid 500, gid 500, pid 12208
    send fd 4
    recv fd 5, uid 500 from peer, position 0
    10
    

    通过这次 debug,找到了经典的 APUE 例子中的一个瑕疵 (随机性比较大,大师刚好没有遇到而已,可能你的机器也不复现)。

    不过回过头来看这个场景,也不能全算在 coder 身上,我感觉系统提供的这个 CMSG_NXTHDR 宏也颇成问题:

    如果我调用这个之前还没有设置下一个子消息,难道还不准我使用了么? 过多的检查反而弄巧成拙,总之一句话:差评! 哈哈~

  • 相关阅读:
    UI: Form editor && use CRectTracker to allow user drag/move/resize graphics
    如何设置共享文件夹
    log4net每天生成一个log文件
    自动属性/匿名方法/Lamda表达式
    ASPOSE.CELL
    转载:动态调用WebService(C#)
    按钮Button动态事件的名称
    ASPOE.WORD
    操作MSSQL服务还有测试是否连接
    操作MSSQL服务还有测试是否连接1
  • 原文地址:https://www.cnblogs.com/goodcitizen/p/12146060.html
Copyright © 2011-2022 走看看