zoukankan      html  css  js  c++  java
  • 记一次传递文件句柄引发的血案 (续)

    记一次传递文件句柄引发的血案 之后,这个 demo 又引发了一次血案,现录如下。

    这次我是在 linux 上测试文件句柄的传递,linux 上并没有 STREAMS 系统,

    因此是采用 unix domain socket 的 sendmsg/recvmsg 中控制消息部分来传递句柄的。

    代码的主要修改部分集中于发送 fd 与接收 fd 处,一开始代码是这样的,运行良好。

    spipe_fd.c

      1 #define MAXLINE 128
      2 #define RIGHTSLEN CMSG_LEN(sizeof(int))
      3 #define CREDSLEN CMSG_LEN(sizeof(struct CREDSTRUCT))
      4 #define CONTROLLEN (RIGHTSLEN+CREDSLEN)
      5 
      6 int send_fd (int fd, int fd_to_send)
      7 {
      8     struct iovec iov[1]; 
      9     struct msghdr msg; 
     10     struct cmsghdr *cmptr = NULL; 
     11     char buf[2]; 
     12 
     13     iov[0].iov_base = buf; 
     14     iov[0].iov_len = 2; 
     15 
     16     msg.msg_iov = iov; 
     17     msg.msg_iovlen = 1; 
     18     msg.msg_name = NULL; 
     19     msg.msg_namelen = 0; 
     20     msg.msg_flags = 0; 
     21 
     22     if (fd_to_send < 0) {
     23         msg.msg_control = NULL; 
     24         msg.msg_controllen = 0; 
     25         buf[1] = -fd_to_send; 
     26         if (buf[1] == 0)
     27             buf[1] = 1; 
     28     } else {
     29         if ((cmptr = malloc(CONTROLLEN)) == NULL) {
     30             fprintf (stderr, "malloc memory failed
    "); 
     31             return -1; 
     32         }
     33 
     34         msg.msg_control = cmptr; 
     35         msg.msg_controllen = CONTROLLEN; 
     36 
     37         cmptr->cmsg_level = SOL_SOCKET; 
     38         cmptr->cmsg_type = SCM_RIGHTS; 
     39         cmptr->cmsg_len = CONTROLLEN; 
     40 
     41         *(int *) CMSG_DATA(cmptr) = fd_to_send; 
     42         buf[1] = 0; 
     43     }
     44 
     45     buf[0] = 0; 
     46     if (sendmsg(fd, &msg, 0) != 2) {
     47         free (cmptr); 
     48         return -1; 
     49     }
     50 
     51     free (cmptr); 
     52     return 0; 
     53 }

    以上是发送句柄部分,重点位于 37-39 行,设置了控制消息的类型与句柄的值。

    sendmsg 中的数据消息部分,用来兼容出错的场景(出错时可以提供一个-1~-255的错误码,及一段描述信息),关键信息位于控制部分。

    下面来看消息的接收:

     1 int recv_fd (int fd, uid_t *uidptr, ssize_t (*userfunc) (int, const void*, size_t))
     2 {
     3     struct cmsghdr *cmptr = NULL; 
     4     int newfd, nr, status; 
     5     char *ptr; 
     6     char buf[MAXLINE]; 
     7     struct iovec iov[1]; 
     8     struct msghdr msg; 
     9 
    10     status = -1; 
    11     newfd = -1; 
    12 
    13     for (;;) {
    14         iov[0].iov_base = buf; 
    15         iov[0].iov_len = sizeof (buf); 
    16 
    17         msg.msg_iov = iov; 
    18         msg.msg_iovlen = 1; 
    19         msg.msg_name = NULL; 
    20         msg.msg_namelen = 0; 
    21 
    22         if ((cmptr = malloc (CONTROLLEN)) == NULL) {
    23             fprintf (stderr, "malloc error
    "); 
    24             return -1; 
    25         }
    26 
    27         msg.msg_control = cmptr; 
    28         msg.msg_controllen = CONTROLLEN; 
    29 
    30         if ((nr = recvmsg (fd, &msg, 0)) < 0) { 
    31             fprintf (stderr, "recvmsg error
    "); 
    32             free (cmptr); 
    33             return -1; 
    34         } else if (nr == 0) {
    35             fprintf (stderr, "connection closed by server
    "); 
    36             free (cmptr); 
    37             return -1; 
    38         }
    39 
    40         for (ptr = buf; ptr < &buf[nr]; ) {
    41             if (*ptr ++ == 0) {
    42                 if (ptr != &buf[nr-1]) {
    43                     fprintf (stderr, "message format error"); 
    44                     free (cmptr); 
    45                     return -1; 
    46                 }
    47 
    48                 status = *ptr & 0xff; 
    49                 if (status == 0) {
    50                     if (msg.msg_controllen != CONTROLLEN) { 
    51                         fprintf (stderr, "status = 0 but no fd
    "); 
    52                         free (cmptr); 
    53                         return -1; 
    54                     }
    55 
    56                     newfd = *(int *) CMSG_DATA(cmptr); 
    57                 } else { 
    58                     newfd = -status; 
    59                 }
    60 
    61                 nr -= 2; 
    62             }
    63         }
    64 
    65         free(cmptr); 
    66         if (nr > 0 && (*userfunc)(STDERR_FILENO, buf, nr) != nr)
    67             return -1; 
    68 
    69         if (status >= 0)
    70             return newfd; 
    71     }
    72 
    73     return -1; 
    74 }

    接收部分的重点位于 56 行,这里取得了对方传递过来的文件句柄(注意不是简单的值传递!参考上篇文章)

    其它一些代码则用来处理出错信息,当出现错误时,调用 userfunc 打印错误信息 (用户一般传递 write) 。

    另外接口中 uidptr 参数并没有用,这个是为将来扩展预留的。

    使用之前的 demo (spipe_server.c / spipe_client.c)编译、运行,输出结果如下:

    ./spipe_server ./spipe_client
    create pipe 3.4
    3 7
    create temp file /tmp/outliqA3i with fd 4
    seek to head
    send fd 4 to peer
    recv fd 3, position 0
    create temp file /tmp/inaLr30i with fd 4
    source: 3 7
    
    seek to head
    send fd 4
    recv fd 5 from peer, position 0
    10

    可以看到通过新的方式传递的文件句柄值也发生了变化(从 4 变为 3),且也需要对文件偏移进行重置,否则还会掉到之前文章说的那个坑里。

    问题出现在增加一些代码来传递发送进程凭证(如uid)时,此时发送方需要传递两个控制子消息(分别表示句柄与凭证),接收方也需要处理两个子消息。

    新的发送代码如下:

     1 #define MAXLINE 128
     2 #if defined(SCM_CREDS) // on BSD
     3 #define CREDSTRUCT cmsgcred
     4 #define CR_UID cmcred_uid
     5 #define CREDOPT LOCAL_PEERCRED
     6 #define SCM_CREDTYPE SCM_CREDS
     7 #elif defined(SCM_CREDENTIALS)  // on linux
     8 #define CREDSTRUCT ucred
     9 #define CR_UID uid
    10 #define CREDOPT SO_PASSCRED
    11 #define SCM_CREDTYPE SCM_CREDENTIALS
    12 #else
    13 #error passing credentials is unsupported!
    14 #endif
    15 
    16 #define RIGHTSLEN CMSG_LEN(sizeof(int))
    17 #define CREDSLEN CMSG_LEN(sizeof(struct CREDSTRUCT))
    18 #define CONTROLLEN (RIGHTSLEN+CREDSLEN)
    19 
    20 
    21 int send_fd (int fd, int fd_to_send)
    22 {
    23     struct iovec iov[1]; 
    24     struct msghdr msg; 
    25     struct cmsghdr *cmptr = NULL; 
    26     char buf[2]; 
    27     struct CREDSTRUCT *credp; 
    28     struct cmsghdr *cmp; 
    29 
    30     iov[0].iov_base = buf; 
    31     iov[0].iov_len = 2; 
    32 
    33     msg.msg_iov = iov; 
    34     msg.msg_iovlen = 1; 
    35     msg.msg_name = NULL; 
    36     msg.msg_namelen = 0; 
    37     msg.msg_flags = 0; 
    38 
    39     if (fd_to_send < 0) {
    40         msg.msg_control = NULL; 
    41         msg.msg_controllen = 0; 
    42         buf[1] = -fd_to_send; 
    43         if (buf[1] == 0)
    44             buf[1] = 1; 
    45     } else {
    46         if ((cmptr = malloc(CONTROLLEN)) == NULL) {
    47             fprintf (stderr, "malloc memory failed
    "); 
    48             return -1; 
    49         }
    50 
    51         msg.msg_control = cmptr; 
    52         msg.msg_controllen = CONTROLLEN; 
    53 
    54         cmp = cmptr; 
    55         cmp->cmsg_level = SOL_SOCKET; 
    56         cmp->cmsg_type = SCM_RIGHTS; 
    57         cmp->cmsg_len = RIGHTSLEN; 
    58         *(int *) CMSG_DATA(cmp) = fd_to_send; 
    59 
    60         cmp = CMSG_NXTHDR(&msg, cmp); 
    61         cmp->cmsg_level = SOL_SOCKET; 
    62         cmp->cmsg_type = SCM_CREDTYPE; 
    63         cmp->cmsg_len = CREDSLEN; 
    64         credp = (struct CREDSTRUCT *) CMSG_DATA(cmp); 
    65 
    66 #  if defined(SCM_CREDENTIALS)
    67         // only linux need to set members of this struct !
    68         credp->uid = getuid (); 
    69         credp->gid = getegid (); 
    70         credp->pid = getpid (); 
    71 #  endif
    72         buf[1] = 0; 
    73     }
    74 
    75     buf[0] = 0; 
    76     if (sendmsg(fd, &msg, 0) != 2) {
    77         free (cmptr); 
    78         return -1; 
    79     }
    80 
    81     free (cmptr); 
    82     return 0; 
    83 }

    最开始的一些宏定义,是用来区分 linux 与 bsd 上一些细节,重点在 55-64 行,这两段代码分别设置了句柄与凭证。

    然后控制消息的大小 CONTROLLEN 由两部分消息的长度(RIGHTSLEN 与 CREDSLEN)累加得到,分配的内存也是这么大。

    再来看接收部分:

      1 int recv_fd (int fd, uid_t *uidptr, ssize_t (*userfunc) (int, const void*, size_t))
      2 {
      3     struct cmsghdr *cmptr = NULL; 
      4 
      5     int newfd, nr, status; 
      6     char *ptr; 
      7     char buf[MAXLINE]; 
      8     struct iovec iov[1]; 
      9     struct msghdr msg; 
     10 
     11     status = -1; 
     12     newfd = -1; 
     13 
     14     const int on = -1; 
     15     struct cmsghdr *cmp; 
     16     struct CREDSTRUCT *credp; 
     17     if (setsockopt (fd, SOL_SOCKET, CREDOPT, &on, sizeof(int)) < 0) {
     18         fprintf (stderr, "setsockopt for %d failed
    ", CREDOPT); 
     19         return -1; 
     20     }
     21 
     22     for (;;) {
     23         iov[0].iov_base = buf; 
     24         iov[0].iov_len = sizeof (buf); 
     25 
     26         msg.msg_iov = iov; 
     27         msg.msg_iovlen = 1; 
     28         msg.msg_name = NULL; 
     29         msg.msg_namelen = 0; 
     30 
     31         if ((cmptr = malloc (CONTROLLEN)) == NULL) {
     32             fprintf (stderr, "malloc error
    "); 
     33             return -1; 
     34         }
     35 
     36         msg.msg_control = cmptr; 
     37         msg.msg_controllen = CONTROLLEN; 
     38 
     39         if ((nr = recvmsg (fd, &msg, 0)) < 0) { 
     40             fprintf (stderr, "recvmsg error
    "); 
     41             free (cmptr); 
     42             return -1; 
     43         } else if (nr == 0) {
     44             fprintf (stderr, "connection closed by server
    "); 
     45             free (cmptr); 
     46             return -1; 
     47         }
     48 
     49         for (ptr = buf; ptr < &buf[nr]; ) {
     50             if (*ptr ++ == 0) {
     51                 if (ptr != &buf[nr-1]) {
     52                     fprintf (stderr, "message format error"); 
     53                     free (cmptr); 
     54                     return -1; 
     55                 }
     56 
     57                 status = *ptr & 0xff; 
     58                 if (status == 0) {
     59                     if (msg.msg_controllen != CONTROLLEN) { 
     60                         fprintf (stderr, "status = 0 but no fd
    "); 
     61                         free (cmptr); 
     62                         return -1; 
     63                     }
     64 
     65                     for (cmp = CMSG_FIRSTHDR(&msg); cmp != NULL; cmp = CMSG_NXTHDR(&msg, cmp)) { 
     66                         if (cmp->cmsg_level != SOL_SOCKET) {
     67                             fprintf (stderr, "ignore unknown socket level %d
    ", cmp->cmsg_level); 
     68                             continue; 
     69                         }
     70 
     71                         switch (cmp->cmsg_type) {
     72                             case SCM_RIGHTS:
     73                                 newfd = *(int *) CMSG_DATA(cmp); 
     74                                 break; 
     75                             case SCM_CREDTYPE:
     76                                 credp = (struct CREDSTRUCT *) CMSG_DATA(cmp); 
     77                                 *uidptr = credp->CR_UID; 
     78                                 break; 
     79                             default:
     80                                 fprintf (stderr, "ignore unknown msg type %d
    ", cmp->cmsg_type); 
     81                                 break; 
     82                         }
     83                     }
     84                 } else { 
     85                     newfd = -status; 
     86                 }
     87 
     88                 nr -= 2; 
     89             }
     90         }
     91 
     92         free(cmptr); 
     93         if (nr > 0 && (*userfunc)(STDERR_FILENO, buf, nr) != nr)
     94             return -1; 
     95 
     96         if (status >= 0)
     97             return newfd; 
     98     }
     99 
    100     return -1; 
    101 }

    重点分为两个部分:

    14-20 行,设置 unix domain socket 可以接收凭证信息;

    65-83 行,分别读取控制消息中的句柄与凭证信息,这里我们取了发送进程的 uid 信息作为凭证返回给上层调用者;

    与发送消息类似,这里使用系统提供的 CMSG_FIRSTHDR、CMSG_NXTHDR 在控制消息中遍历各个子部分。

    重新编译、运行 demo,却发现出错了:

    ./spipe_server ./spipe_client
    create pipe 3.4
    3 7
    create temp file /tmp/outgQY1Y4 with fd 4
    seek to head
    send fd 4 to peer
    recv fd 3, uid 500, position 0
    create temp file /tmp/invVgKW4 with fd 4
    source: 3 7
    
    seek to head
    connection closed by server
    recv fd from peer failed, error -1
    

    从输出日志看,第一次从 server 发往 client 的句柄及凭证是可以的(line 7),再之后 client 处理完消息回传时,就出错了。

    首先定位出错代码位置,在 client 回传这里 (send_fd),加入一些日志:

     1         if ((cmptr = malloc(CONTROLLEN)) == NULL) {
     2             fprintf (stderr, "malloc memory failed
    "); 
     3             return -1; 
     4         }
     5 
     6         msg.msg_control = cmptr; 
     7         msg.msg_controllen = CONTROLLEN; 
     8 
     9         cmp = cmptr; 
    10         cmp->cmsg_level = SOL_SOCKET; 
    11         cmp->cmsg_type = SCM_RIGHTS; 
    12         cmp->cmsg_len = RIGHTSLEN; 
    13         *(int *) CMSG_DATA(cmp) = fd_to_send; 
    14         fprintf (stderr, "add fd with len %d
    ", RIGHTSLEN); 
    15 
    16         cmp = CMSG_NXTHDR(&msg, cmp); 
    17         cmp->cmsg_level = SOL_SOCKET; 
    18         cmp->cmsg_type = SCM_CREDTYPE; 
    19         cmp->cmsg_len = CREDSLEN; 
    20         credp = (struct CREDSTRUCT *) CMSG_DATA(cmp); 
    21         fprintf (stderr, "add credential with len %d
    ", CREDSLEN); 
    22 
    23 #  if defined(SCM_CREDENTIALS)
    24         // only linux need to set members of this struct !
    25         credp->uid = getuid (); 
    26         credp->gid = getegid (); 
    27         credp->pid = getpid (); 
    28         fprintf (stderr, "set uid %d, gid %d, pid %d
    ", credp->uid, credp->gid, credp->pid);
    29 #  endif
    30         buf[1] = 0; 

    标黄的是新加入的输出日志,再次编译运行:

    ./spipe_server ./spipe_client
    create pipe 3.4
    3 7
    create temp file /tmp/outivt2Og with fd 4
    seek to head
    add fd with len 16
    add credential with len 24
    set uid 500, gid 500, pid 12071
    send fd 4 to peer
    recv fd 3, uid 500, position 0
    create temp file /tmp/inHqRwMg with fd 4
    source: 3 7
    
    seek to head
    add fd with len 16
    connection closed by server
    recv fd from peer failed, error -1
    

    可以看到,第一次传递时,这三条日志全都正确输出了,而回传时,只输出了第一条日志。

    所以明显是在第一条日志与第二条日志之间的代码出了问题。左看右看,看不出这块有什么问题,难道系统提供的 CMSG_NXTHDR 会出错?

    这边再加两条日志:

     1         if ((cmptr = malloc(CONTROLLEN)) == NULL) {
     2             fprintf (stderr, "malloc memory failed
    "); 
     3             return -1; 
     4         }
     5 
     6         msg.msg_control = cmptr; 
     7         msg.msg_controllen = CONTROLLEN; 
     8 
     9         cmp = cmptr; 
    10         cmp->cmsg_level = SOL_SOCKET; 
    11         cmp->cmsg_type = SCM_RIGHTS; 
    12         cmp->cmsg_len = RIGHTSLEN; 
    13         *(int *) CMSG_DATA(cmp) = fd_to_send; 
    14         fprintf (stderr, "add fd with len %d
    ", RIGHTSLEN); 
    15         fprintf (stderr, "cmsghdr = %d, cmsglen = %d, after align = %d, control len = %d
    ", sizeof(struct cmsghdr), CREDSLEN, CMSG_ALIGN(CREDSLEN), CONTROLLEN); 
    16 
    17         cmp = CMSG_NXTHDR(&msg, cmp); 
    18         fprintf (stderr, "cmp = %p
    ", cmp); 
    19         cmp->cmsg_level = SOL_SOCKET; 
    20         cmp->cmsg_type = SCM_CREDTYPE; 
    21         cmp->cmsg_len = CREDSLEN; 
    22         credp = (struct CREDSTRUCT *) CMSG_DATA(cmp); 
    23         fprintf (stderr, "add credential with len %d
    ", CREDSLEN); 
    24 
    25 #  if defined(SCM_CREDENTIALS)
    26         // only linux need to set members of this struct !
    27         credp->uid = getuid (); 
    28         credp->gid = getegid (); 
    29         credp->pid = getpid (); 
    30         fprintf (stderr, "set uid %d, gid %d, pid %d
    ", credp->uid, credp->gid, credp->pid);
    31 #  endif
    32         buf[1] = 0; 

    第二条日志是主要怀疑的地方,看指针是否为空;第一条日志则是怀疑块大小计算有误,导致分配的内存不够大,指针递增时出现了范围错误,所以这里打印各种长度做验证。

    再次运行后,又多了一些输出:

    ./spipe_server ./spipe_client
    create pipe 3.4
    3 7
    create temp file /tmp/out7UgSYZ with fd 4
    seek to head
    add fd with len 16
    cmsghdr = 12, cmsglen = 24, after align = 24, control len = 40
    cmp = 0x9ded018
    add credential with len 24
    set uid 500, gid 500, pid 12100
    send fd 4 to peer
    recv fd 3, uid 500, position 0
    create temp file /tmp/inC3nyWZ with fd 4
    source: 3 7
    
    seek to head
    add fd with len 16
    cmsghdr = 12, cmsglen = 24, after align = 24, control len = 40
    cmp = (nil)
    connection closed by server
    recv fd from peer failed, error -1
    

    神奇的地方出现了,同样的代码,相同的尺寸,第一次指针正常;第二次就为空了!

    崩溃点找到了,但是还是一头雾水,看起来数据块都对齐了,计算也没毛病,难道是这个系统提供的宏 (CMSG_NXTHDR) 出问题了吗?

    翻看头文件,找到这一段的定义 (我所在的系统,位于 /usr/include/bits/socket.h (L311)):

     1 __EXTERN_INLINE struct cmsghdr *
     2 __NTH (__cmsg_nxthdr (struct msghdr *__mhdr, struct cmsghdr *__cmsg))
     3 {
     4   if ((size_t) __cmsg->cmsg_len < sizeof (struct cmsghdr))
     5     /* The kernel header does this so there may be a reason.  */
     6     return 0;
     7 
     8   __cmsg = (struct cmsghdr *) ((unsigned char *) __cmsg
     9                    + CMSG_ALIGN (__cmsg->cmsg_len));
    10   if ((unsigned char *) (__cmsg + 1) > ((unsigned char *) __mhdr->msg_control
    11                     + __mhdr->msg_controllen)
    12       || ((unsigned char *) __cmsg + CMSG_ALIGN (__cmsg->cmsg_len)
    13       > ((unsigned char *) __mhdr->msg_control + __mhdr->msg_controllen)))
    14     /* No more entries.  */
    15     return 0;
    16   return __cmsg;
    17 }

    这段 INLINE 函数主要包含三个判断,

    1)子消息长度小于消息头长度,返回 null;

    2)下一个子消息的消息头超出消息尾部,返回null;

    3)下一个子消息的消息体超出消息尾部,返回null;

    直接修改系统代码不方便,将这个函数拷贝到本地并重全名为 my_cmsg_nxthdr,在各个判断下面添加日志输出:

     1 struct cmsghdr *my_cmsg_nxthdr (struct msghdr *__mhdr, struct cmsghdr *__cmsg)
     2 {
     3   if ((size_t) __cmsg->cmsg_len < sizeof (struct cmsghdr)) {
     4     /* The kernel header does this so there may be a reason.  */
     5     fprintf (stderr, "in step1
    "); 
     6     return 0;
     7   }
     8 
     9   fprintf (stderr, "%p: cmsg_len %u, cmsg_level %d, cmsg_type %d
    ", __cmsg, __cmsg->cmsg_len, __cmsg->cmsg_level, __cmsg->cmsg_type); 
    10   __cmsg = (struct cmsghdr *) ((unsigned char *) __cmsg + CMSG_ALIGN (__cmsg->cmsg_len));
    11   if ((unsigned char *) (__cmsg + 1) > ((unsigned char *) __mhdr->msg_control + __mhdr->msg_controllen)) {
    12       fprintf (stderr, "in step2
    "); 
    13       return 0; 
    14   }
    15 
    16   fprintf (stderr, "%p: cmsg_len %u, cmsg_level %d, cmsg_type %d
    ", __cmsg, __cmsg->cmsg_len, __cmsg->cmsg_level, __cmsg->cmsg_type); 
    17   if (((unsigned char *) __cmsg + CMSG_ALIGN (__cmsg->cmsg_len) > ((unsigned char *) __mhdr->msg_control + __mhdr->msg_controllen))) {
    18     /* No more entries.  */
    19       fprintf (stderr, "in step3
    "); 
    20       fprintf (stderr, "msg len %d, after align %d, msg control %d
    ", __cmsg->cmsg_len, CMSG_ALIGN(__cmsg->cmsg_len), __mhdr->msg_controllen); 
    21     return 0;
    22   }
    23 
    24   fprintf (stderr, "in final step
    "); 
    25   return __cmsg;
    26 }

    为了便于根据不同的判断条件输出日志,这里对判断条件进行了拆分。

    再次运行 demo,输出如下:

    ./spipe_server ./spipe_client
    create pipe 3.4
    3 7
    create temp file /tmp/outh7NhIs with fd 4
    seek to head
    add fd with len 16
    cmsghdr = 12, cmsglen = 24, after align = 24, control len = 40
    0x9336008: cmsg_len 16, cmsg_level 1, cmsg_type 1
    0x9336018: cmsg_len 0, cmsg_level 0, cmsg_type 0
    in final step
    cmp = 0x9336018
    add credential with len 24
    set uid 500, gid 500, pid 12171
    send fd 4 to peer
    recv fd 3, uid 500, position 0
    create temp file /tmp/inoJMmKs with fd 4
    source: 3 7
    
    seek to head
    add fd with len 16
    cmsghdr = 12, cmsglen = 24, after align = 24, control len = 40
    0x904d008: cmsg_len 16, cmsg_level 1, cmsg_type 1
    0x904d018: cmsg_len 500, cmsg_level 500, cmsg_type 16
    in step3
    msg len 500, after align 500, msg control 40
    cmp = (nil)
    connection closed by server
    recv fd from peer failed, error -1
    

    原来是第三个判断出现了问题(Line 24)!

    消息总长度是 16 + 24 = 40,而这里的第二个子消息单个的长度达到 500,明显越界了。

    但是第二个子消息的长度明明是 24 呀,哪里跑出来的 500 呢?

    而且它的其它字段也明显不对,例如消息 level 也是 500,消息类型是 16 !

    初步可以确定是这块内存被弄乱了,而从前面打印的消息指针(0x904d008 与 0x904d018)看,分配的大小是没问题的,因此内存越界问题先排除掉;

    其次是我们设置好的内容……等等……我们好像还没有设置第二个子消息的内容!!

    ……

    malloc 之后没有清空的垃圾数据!

    ……

    这也是第一次调用没问题而第二次掉坑里的原因,随着系统内存的分配回收而存在一定的随机性!

    找到原因之后,修改就简单了,可以将 malloc 替换为 calloc,或者简单的加一句 memset 来清空内存:

     1         if ((cmptr = malloc(CONTROLLEN)) == NULL) {
     2             fprintf (stderr, "malloc memory failed
    "); 
     3             return -1; 
     4         }
     5 
     6         // important on linux, garbage data may mess cmsg_len fields, 
     7         // and cause CMSG_NXTHDR return null on protection.
     8         memset (cmptr, 0, CONTROLLEN); 
     9         msg.msg_control = cmptr; 
    10         msg.msg_controllen = CONTROLLEN; 

    再次运行 demo,一切正常:

    ./spipe_server ./spipe_client
    create pipe 3.4
    3 7
    create temp file /tmp/outqsTYkp with fd 4
    seek to head
    add fd with len 16
    cmsghdr = 12, cmsglen = 24, after align = 24, control len = 40
    0x814c008: cmsg_len 16, cmsg_level 1, cmsg_type 1
    0x814c018: cmsg_len 0, cmsg_level 0, cmsg_type 0
    in final step
    cmp = 0x814c018
    add credential with len 24
    set uid 500, gid 500, pid 12207
    send fd 4 to peer
    recv fd 3, uid 500, position 0
    create temp file /tmp/in3ntkip with fd 4
    source: 3 7
    
    seek to head
    add fd with len 16
    cmsghdr = 12, cmsglen = 24, after align = 24, control len = 40
    0x8389008: cmsg_len 16, cmsg_level 1, cmsg_type 1
    0x8389018: cmsg_len 0, cmsg_level 0, cmsg_type 0
    in final step
    cmp = 0x8389018
    add credential with len 24
    set uid 500, gid 500, pid 12208
    send fd 4
    recv fd 5, uid 500 from peer, position 0
    10
    

    通过这次 debug,找到了经典的 APUE 例子中的一个瑕疵 (随机性比较大,大师刚好没有遇到而已,可能你的机器也不复现)。

    不过回过头来看这个场景,也不能全算在 coder 身上,我感觉系统提供的这个 CMSG_NXTHDR 宏也颇成问题:

    如果我调用这个之前还没有设置下一个子消息,难道还不准我使用了么? 过多的检查反而弄巧成拙,总之一句话:差评! 哈哈~

  • 相关阅读:
    SharePoint 2013 APP 开发示例 (六)服务端跨域访问 Web Service (REST API)
    麦咖啡导致电脑不能上网
    SharePoint 2013 Central Admin 不能打开
    SharePoint 2013 APP 开发示例 (五)跨域访问 Web Service (REST API)
    SharePoint 2013 APP 开发示例 系列
    synthesize(合成) keyword in IOS
    Git Cmd
    简单的正则匹配
    Dropbox
    SQL Server Replication
  • 原文地址:https://www.cnblogs.com/goodcitizen/p/the_continue_to_murder_caused_by_passing_file_handle.html
Copyright © 2011-2022 走看看