zoukankan      html  css  js  c++  java
  • linux 2.6.32.220的一个crash记录

    有同事分析一个crash,我参与了分析,记录如下,供遇到相同crash的兄弟参考:

    crash> bt
    PID: 24632  TASK: ffff881723ce8080  CPU: 14  COMMAND: "30-00-SS"
     #0 [ffff881723cef6d0] machine_kexec at ffffffff8103244b
     #1 [ffff881723cef730] crash_kexec at ffffffff810bb0e2
     #2 [ffff881723cef800] oops_end at ffffffff814fe120
     #3 [ffff881723cef830] no_context at ffffffff810425db
     #4 [ffff881723cef880] __bad_area_nosemaphore at ffffffff81042865
     #5 [ffff881723cef8d0] bad_area at ffffffff8104298e
     #6 [ffff881723cef900] __do_page_fault at ffffffff810430c0
     #7 [ffff881723cefa20] do_page_fault at ffffffff8150014e
     #8 [ffff881723cefa50] page_fault at ffffffff814fd485
        [exception RIP: udp_send_skb+744]
        RIP: ffffffff81497358  RSP: ffff881723cefb08  RFLAGS: 00010202
        RAX: 000000008cfa0d02  RBX: ffff881439a092c0  RCX: 0000000000000000
        RDX: 000000000000005c  RSI: 0000000000000014  RDI: ffff881061d4b800
        RBP: ffff881723cefb48   R8: 000000004a00300a   R9: 0000000039a092c0
        R10: 0000000000000000  R11: 0000000000000000  R12: 000000000000005c
        R13: ffff8811f8e77100  R14: 0000000000000000  R15: ffff881061d4b824
        ORIG_RAX: ffffffffffffffff  CS: 0010  SS: 0018
     #9 [ffff881723cefb50] udp_sendmsg at ffffffff8149781d
    #10 [ffff881723cefc50] inet_sendmsg at ffffffff8149fc5a
    #11 [ffff881723cefc90] sock_sendmsg at ffffffff814261ca
    #12 [ffff881723cefe40] sys_sendto at ffffffff81426ae9
    #13 [ffff881723ceff80] system_call_fastpath at ffffffff8100b0d2
        RIP: 0000003e1580ee83  RSP: 00007ffb0c9339b8  RFLAGS: 00000202
        RAX: 000000000000002c  RBX: ffffffff8100b0d2  RCX: 0000000000000002
        RDX: 0000000000000054  RSI: 00007ffb0c934380  RDI: 0000000000000b0d
        RBP: 00007ffaf4175c80   R8: 00007ffb0c933fb0   R9: 000000000000001c
        R10: 0000000000000000  R11: 0000000000000293  R12: 00007ffb0c934258
        R13: 00007ffb0c93432c  R14: 000000000000e2ab  R15: 00007ffaf4d3a878
        ORIG_RAX: 000000000000002c  CS: 0033  SS: 002b
    crash>
    BUG: unable to handle kernel paging request at 000000008cfa0d76
    IP: [<ffffffff81497358>] udp_send_skb+0x2e8/0x3d0
    PGD be47ab067 PUD 0
    Oops: 0000 [#1] SMP
    last sysfs file: /sys/devices/system/cpu/online
    CPU 14
    Modules linked in: ***,省略
    Pid: 24632, comm: 30-00-SS Tainted: G        W  ----------------   2.6.32-220.el6.x86_64 #1 To be filled by O.E.M. To be filled by O.E.M./To be filled by O.E.M.
    RIP: 0010:[<ffffffff81497358>]
     [<ffffffff81497358>] udp_send_skb+0x2e8/0x3d0
    RSP: 0018:ffff881723cefb08  EFLAGS: 00010202
    RAX: 000000008cfa0d02 RBX: ffff881439a092c0 RCX: 0000000000000000
    RDX: 000000000000005c RSI: 0000000000000014 RDI: ffff881061d4b800
    RBP: ffff881723cefb48 R08: 000000004a00300a R09: 0000000039a092c0
    R10: 0000000000000000 R11: 0000000000000000 R12: 000000000000005c
    R13: ffff8811f8e77100 R14: 0000000000000000 R15: ffff881061d4b824
    FS:  00007ffb0c935700(0000) GS:ffff880c5a780000(0000) knlGS:0000000000000000
    CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
    CR2: 000000008cfa0d76 CR3: 0000000c10733000 CR4: 00000000000006e0
    DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
    DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
    Process 30-00-SS (pid: 24632, threadinfo ffff881723cee000, task ffff881723ce8080)
    Stack:
     0000000000000000 5e2d320a2654dacc ffff880c94c6acc0 ffff8811f8e77100
    <0> 0000000000000000 000000004a00300a ffff881723cefe58 ffffffff81471f40
    <0> ffff881723cefc48 ffffffff8149781d ffff881723cefbc0 ffffffff00000040
    Call Trace:
     [<ffffffff81471f40>] ? ip_generic_getfrag+0x0/0xb0
     [<ffffffff8149781d>] udp_sendmsg+0x2ed/0x8f0
     [<ffffffff8149fc5a>] inet_sendmsg+0x4a/0xb0
     [<ffffffff814261ca>] sock_sendmsg+0x11a/0x150
     [<ffffffff810925e0>] ? autoremove_wake_function+0x0/0x40
     [<ffffffff810a4b2e>] ? futex_wake+0x10e/0x120
     [<ffffffff8100bdee>] ? reschedule_interrupt+0xe/0x20
     [<ffffffff8117f90a>] ? fget_light+0x7a/0x90
     [<ffffffff81426ae9>] sys_sendto+0x139/0x190
     [<ffffffff810d6a12>] ? audit_syscall_entry+0xc2/0x2b0
     [<ffffffff8100b0d2>] system_call_fastpath+0x16/0x1b
    Code: 30 01 00 00 4e 8d 3c 3f 48 8b 44 07 18 44 89 fe 2b b3 d8 00 00 00 41 29 f4 48 85 c0 44 89 e2 0f 84 9e 00 00 00 66 0f 1f 44 00 00 <03> 48 74 40 0f 92 c7 2b 50 68 48 8b 00 40 0f b6 ff 8d 0c 0f 48
    RIP  [<ffffffff81497358>] udp_send_skb+0x2e8/0x3d0
     RSP <ffff881723cefb08>
    CR2: 000000008cfa0d76

    从堆栈看,用户通过sendto系统调用进入内核,在内核态调用 udp_send_skb 时出现异常,执行的代码为 :udp_send_skb+0x2e8/0x3d0

    /usr/src/debug/kernel-2.6.32-220.el6/linux-2.6.32-220.el6.x86_64/include/net/checksum.h: 64
    0xffffffff81497358 <udp_send_skb+744>:  add    0x74(%rax),%ecx----------------------------------------异常

    根据代码行,确定 checksum.h:64所在的函数是:csum_add,根据调用链 

    udp4_hwcsum ->csum_add会调用csum_add,udp_csum 也会调用csum_add,所以需要具体分析一下哪个调用csum_add导致的异常。
     
    skb的参数是 udp_sendmsg 中根据 ip_make_skb 调用返回的,
    0xffffffff814977eb <udp_sendmsg+699>:   callq  0xffffffff81474820 <ip_make_skb>
    /usr/src/debug/kernel-2.6.32-220.el6/linux-2.6.32-220.el6.x86_64/net/ipv4/udp.c: 764
    0xffffffff814977f0 <udp_sendmsg+704>:   test   %rax,%rax
    /usr/src/debug/kernel-2.6.32-220.el6/linux-2.6.32-220.el6.x86_64/net/ipv4/udp.c: 763
    0xffffffff814977f3 <udp_sendmsg+707>:   mov    %eax,%r9d
    /usr/src/debug/kernel-2.6.32-220.el6/linux-2.6.32-220.el6.x86_64/net/ipv4/udp.c: 764
    0xffffffff814977f6 <udp_sendmsg+710>:   je     0xffffffff81497a6d <udp_sendmsg+1341>
    0xffffffff814977fc <udp_sendmsg+716>:   cmp    $0xfffffffffffff000,%rax
    0xffffffff81497802 <udp_sendmsg+722>:   ja     0xffffffff81497a6d <udp_sendmsg+1341>
    /usr/src/debug/kernel-2.6.32-220.el6/linux-2.6.32-220.el6.x86_64/net/ipv4/udp.c: 765
    0xffffffff81497808 <udp_sendmsg+728>:   movzwl -0xb2(%rbp),%edx
    0xffffffff8149780f <udp_sendmsg+735>:   mov    -0xb0(%rbp),%esi
    0xffffffff81497815 <udp_sendmsg+741>:   mov    %rax,%rdi
    0xffffffff81497818 <udp_sendmsg+744>:   callq  0xffffffff81497070 <udp_send_skb>

    rax就是skb,没有使用栈来保存,在调用udp_send_skb之前,又赋值给了rdi,所以rdi就是skb,进入udp_send_skb的堆栈之后:

    crash> dis -l udp_send_skb
    /usr/src/debug/kernel-2.6.32-220.el6/linux-2.6.32-220.el6.x86_64/net/ipv4/udp.c: 519
    0xffffffff81497070 <udp_send_skb>:      push   %rbp
    0xffffffff81497071 <udp_send_skb+1>:    mov    %rsp,%rbp
    0xffffffff81497074 <udp_send_skb+4>:    push   %r15
    0xffffffff81497076 <udp_send_skb+6>:    push   %r14
    0xffffffff81497078 <udp_send_skb+8>:    push   %r13
    0xffffffff8149707a <udp_send_skb+10>:   push   %r12
    0xffffffff8149707c <udp_send_skb+12>:   push   %rbx
    0xffffffff8149707d <udp_send_skb+13>:   sub    $0x18,%rsp
    0xffffffff81497081 <udp_send_skb+17>:   nopl   0x0(%rax,%rax,1)
    0xffffffff81497086 <udp_send_skb+22>:   mov    %esi,-0x34(%rbp)
    /usr/src/debug/kernel-2.6.32-220.el6/linux-2.6.32-220.el6.x86_64/include/linux/skbuff.h: 1227
    0xffffffff81497089 <udp_send_skb+25>:   mov    0xbc(%rdi),%r12d-------------------------------------此时rdi就是skb,r12d 就是skb->transport_header
    /usr/src/debug/kernel-2.6.32-220.el6/linux-2.6.32-220.el6.x86_64/net/ipv4/udp.c: 519
    0xffffffff81497090 <udp_send_skb+32>:   mov    %rdi,%rbx

    我们可以看到,rdi赋值给了rbx,且rbx在最后遇到异常指令之前,都没有被修改过,所以rbx中就是skb指针。也就是:ffff881439a092c0 ,有了skb,我们就好分析udp_send_skb的执行流了。

    crash> struct -x sk_buff.sk ffff881439a092c0
      sk = 0xffff8811f8e77100
    crash> struct udp_sock.pcflag 0xffff8811f8e77100
      pcflag = 0 '00'

    crash> struct sock.sk_no_check 0xffff8811f8e77100
    sk_no_check = 0

    crash> struct -x sk_buff.ip_summed ffff881439a092c0
    ip_summed = 0x3

    所以is_udplite的值为0,

        if (is_udplite)                   /*     UDP-Lite      */------------------------这个不会执行
            csum = udplite_csum(skb);
    
        else if (sk->sk_no_check == UDP_CSUM_NOXMIT) {   /* UDP csum disabled */--------udp_csum_noxmit为1,所以也不会执行
    
            skb->ip_summed = CHECKSUM_NONE;
            goto send;
    
        } else if (skb->ip_summed == CHECKSUM_PARTIAL) { /* UDP hardware csum */------------checksum_parital为3,走这个流程
    
            udp4_hwcsum(skb, rt->rt_src, daddr);
            goto send;
    
        } else
            csum = udp_csum(skb);

     看一下udp4_hwcsum流程:

    static void udp4_hwcsum(struct sk_buff *skb, __be32 src, __be32 dst)
    {
        struct udphdr *uh = udp_hdr(skb);
        struct sk_buff *frags = skb_shinfo(skb)->frag_list;
        int offset = skb_transport_offset(skb);
        int len = skb->len - offset;
        int hlen = len;
        __wsum csum = 0;
    
        if (!frags) {
            /*
             * Only one fragment on the socket.
             */
            skb->csum_start = skb_transport_header(skb) - skb->head;
            skb->csum_offset = offsetof(struct udphdr, check);
            uh->check = ~csum_tcpudp_magic(src, dst, len,
                               IPPROTO_UDP, 0);
        } else {
            /*
             * HW-checksum won't work as there are two or more
             * fragments on the socket so that all csums of sk_buffs
             * should be together
             */
            do {
                csum = csum_add(csum, frags->csum);
                hlen -= frags->len;
            } while ((frags = frags->next));
    
            csum = skb_checksum(skb, offset, hlen, csum);
            skb->ip_summed = CHECKSUM_NONE;
    
            uh->check = csum_tcpudp_magic(src, dst, len, IPPROTO_UDP, csum);
            if (uh->check == 0)
                uh->check = CSUM_MANGLED_0;
        }
    }

    需要分析 struct sk_buff *frags = skb_shinfo(skb)->frag_list;

    crash> struct -x sk_buff.head ffff881439a092c0
      head = 0xffff881061d4b800 "330,~350372177"
    crash> struct -x sk_buff.end ffff881439a092c0
      end = 0xc0
    crash> px 0xffff881061d4b800+0xc0
    $18 = 0xffff881061d4b8c0
    crash> struct skb_shared_info.frag_list^C
    crash> struct skb_shared_info.frag_list 0xffff881061d4b8c0
      frag_list = 0x8cfa0d02
    crash>
    crash> struct frag_list.next 0x8cfa0d02
    struct: invalid kernel virtual address: 0x8cfa0d02
    crash> struct -xo frag_list.next
    struct: invalid data structure reference: frag_list
    crash> struct -xo frag_list
    struct: invalid data structure reference: frag_list
    crash> struct -xo sk_buff.next
    struct sk_buff {
       [0x0] struct sk_buff *next;
    }
    crash> struct -xo sk_buff.csum
    struct sk_buff {
      [0x74]     __wsum csum;
    }
    crash> p 0x8cfa0d02+0x74
    $19 = 2365197686
    crash> px 0x8cfa0d02+0x74
    $20 = 0x8cfa0d76--------------------这个地址不对,导致panic

    frags 为 0x8cfa0d02 ,这个地址有问题,导致了panic。

     这时候需要返回回去分析skb的组成了,而frags又和ip_options相关,ip_options又和ipc相关,先获取到ipc

     #9 [ffff881723cefb50] udp_sendmsg at ffffffff8149781d
        ffff881723cefb58: ffff881723cefbc0 ffffffff00000040
        ffff881723cefb68: ffff881723cefbc8 ffffffff00000000
        ffff881723cefb78: ffff881723cefba8 ffff881100000000
        ffff881723cefb88: ffffffff8201f300 abe2880c94c6acc0
        ffff881723cefb98: 000000005e2d320a 0000000000000054
        ffff881723cefba8: 000000005e2d320a ffff880c641d7f40--------ipc.opt的地址是ffff881723cefbb0,它的值是 ffff880c641d7f40
        ffff881723cefbb8: ffff881723cefc00 0000000000000000
        ffff881723cefbc8: 0000000000000000 5e2d320a00000000
        ffff881723cefbd8: 000000004a00300a 0000000000000000
        ffff881723cefbe8: 0000000000000000 0000000000000000
        ffff881723cefbf8: abe22c6800000011 0000000000000000
        ffff881723cefc08: ffff881723cefd18 000000002654dacc
        ffff881723cefc18: ffff881723cefc98 ffff881723cefc98
        ffff881723cefc28: ffff881723cefe58 ffff881723cefc98
        ffff881723cefc38: 0000000000000054 ffff881723cefd88
        ffff881723cefc48: ffff881723cefc88 ffffffff8149fc5a
    #10 [ffff881723cefc50] inet_sendmsg at ffffffff8149fc5a
      627         ipc.opt = NULL;
        628         ipc.shtx.flags = 0;
    
    usr/src/debug/kernel-2.6.32-220.el6/linux-2.6.32-220.el6.x86_64/net/ipv4/udp.c: 627
    0xffffffff81497605 <udp_sendmsg+213>:   movq   $0x0,-0x98(%rbp)
    /usr/src/debug/kernel-2.6.32-220.el6/linux-2.6.32-220.el6.x86_64/net/ipv4/udp.c: 628
    0xffffffff81497610 <udp_sendmsg+224>:   movb   $0x0,-0x90(%rbp)

    可以确定,ipc.opt的地址为 0xffff881723cefbb0,其值为 0xffff881723cefbb0 ,由于opt本身是一个指针,所以可以取出 堆栈 0xffff881723cefbb0  中对应的值 ffff880c641d7f40 来获取ip_options

    crash> px 0xffff881723cefc48-0x98
    $17 = 0xffff881723cefbb0
    crash> struct ip_options ffff880c641d7f40
    struct ip_options {
      faddr = 1346456898,
      optlen = 0 '00',
      srr = 1 '01',
      rr = 0 '00',
      ts = 1 '01',
      is_strictroute = 1 '01',
      srr_is_hit = 1 '01',
      is_changed = 1 '01',
      rr_needaddr = 1 '01',
      ts_needtime = 1 '01',
      ts_needaddr = 1 '01',
      router_alert = 255 '377',
      cipso = 255 '377',
      __pad2 = 255 '377',
      __data = 0xffff880c641d7f4c "377377377377377377377377377377377377"
    }

    我们看这个内容,明显是错误的,因为我们不会开启srr,这个ip选项的具体意义,建议大家参考《深入理解linux网络技术内幕》

    crash> kmem ffff880c641d7f40
    CACHE            NAME                 OBJSIZE  ALLOCATED     TOTAL  SLABS  SSIZE
    ffff880c2fc40100 size-64                   64    1099272   1195930  20270     4k
    SLAB              MEMORY            TOTAL  ALLOCATED  FREE
    ffff880c641d7000  ffff880c641d7140     59          9    50
    FREE / [ALLOCATED]
      [ffff880c641d7f40]

    说明这个ipc->opt是已经分配的slab,但不管怎么样,按照逻辑,要和inet->opt一致才对,我们看下inet->opt为多少,而要知道inet的值,就需要按照调用栈分析

    从开始调用链开始分析:

    我们获取以下sendto中的msg,socket指针。

    /usr/src/debug/kernel-2.6.32-220.el6/linux-2.6.32-220.el6.x86_64/net/socket.c: 1706
    0xffffffff81426a3d <sys_sendto+141>:    movq   $0x0,-0x120(%rbp)

    根据源码:

       1700         sock = sockfd_lookup_light(fd, &err, &fput_needed);
       1701         if (!sock)
       1702                 goto out;
       1703
       1704         iov.iov_base = buff;
       1705         iov.iov_len = len;
       1706         msg.msg_name = NULL;
       1707         msg.msg_iov = &iov;
       1708         msg.msg_iovlen = 1;
       1709         msg.msg_control = NULL;
       1710         msg.msg_controllen = 0;

    根据堆栈:

    #12 [ffff881723cefe40] sys_sendto at ffffffff81426ae9
        ffff881723cefe48: 0000000000000054 00007ffb0c934380
        ffff881723cefe58: ffff881723cefeb8 01ff880c0000001c-------------------ffff881723cefe58为struct msghdr msg的地址
        ffff881723cefe68: ffff881723cefe98 0000000000000001
        ffff881723cefe78: 0000000000000000 0000000000000000
        ffff881723cefe88: 0000000000000040 0000000000016000
        ffff881723cefe98: 00007ffb0c934380 0000000000000054
        ffff881723cefea8: ffff88180f46c7c0 0000000000000001
        ffff881723cefeb8: 5e2d320aabe20002 0000000000000000
        ffff881723cefec8: 0000000000000000 0000000000000000
        ffff881723cefed8: 0000000000006038 0000000001d4e3b0
        ffff881723cefee8: ffff881723ceff78 ffff881723ce8638
        ffff881723cefef8: ffff881723ceff78 ffffffff810d6a12
        ffff881723ceff08: ffff881723ce8640 ffff88162c949540
        ffff881723ceff18: ffff881723ce8080 ffff88180deb4e48
        ffff881723ceff28: ffff88180deb4e48 ffff88180deb4e48
        ffff881723ceff38: ffff881723ceff78 000000002654dacc
        ffff881723ceff48: 00007ffb0c934090 00007ffb0c934380
        ffff881723ceff58: 00007ffaf4d3a878 000000000000e2ab
        ffff881723ceff68: 00007ffb0c93432c 00007ffb0c934258
        ffff881723ceff78: 00007ffaf4175c80 ffffffff8100b0d2---------------ffff881723ceff78 为rbp
    #13 [ffff881723ceff80] system_call_fastpath at ffffffff8100b0d2

    先从栈中找到msghr 的地址:

    /usr/src/debug/kernel-2.6.32-220.el6/linux-2.6.32-220.el6.x86_64/net/socket.c: 1706
    0xffffffff81426a3d <sys_sendto+141>:    movq   $0x0,-0x120(%rbp)
    
    
    crash> px 0xffff881723ceff78-0x120
    $11 = 0xffff881723cefe58

    对应1706行的代码是 msg.msg_name = NULL;,对应的汇编语句是 movq $0x0,-0x120(%rbp),所以rbp减去0x120就是msg的地址,因为msg_name是msg的第一个成员:

    crash> struct msghdr ffff881723cefe58
    struct msghdr {
      msg_name = 0xffff881723cefeb8,
      msg_namelen = 28,
      msg_iov = 0xffff881723cefe98,
      msg_iovlen = 1,
      msg_control = 0x0,
      msg_controllen = 0,
      msg_flags = 64
    }

    由于iov赋值给了msg_iov成员,所以也可以确定iov的地址为  0xffff881723cefe98

    crash> struct msghdr ffff881723cefe58
    struct msghdr {
      msg_name = 0xffff881723cefeb8,
      msg_namelen = 28,
      msg_iov = 0xffff881723cefe98,
      msg_iovlen = 1,
      msg_control = 0x0,
      msg_controllen = 0,--------------------------这个标志在udp_sendmsg中会用到
      msg_flags = 64
    }
    crash> struct iovec 0xffff881723cefe98
    struct iovec {
      iov_base = 0x7ffb0c934380,
      iov_len = 84----------------用户态需要发送的buf中的字节数
    }

    下面分析sock指针,用户态通过fd传入到内核态来找到对应的socket指针:

    sock = sockfd_lookup_light(fd, &err, &fput_needed);,所以socket *就是 sockfd_lookup_light的返回值。
    /usr/src/debug/kernel-2.6.32-220.el6/linux-2.6.32-220.el6.x86_64/net/socket.c: 1700
    0xffffffff81426a04 <sys_sendto+84>:     callq  0xffffffff81424e80 <sockfd_lookup_light>
    /usr/src/debug/kernel-2.6.32-220.el6/linux-2.6.32-220.el6.x86_64/net/socket.c: 1701
    0xffffffff81426a09 <sys_sendto+89>:     test   %rax,%rax
    /usr/src/debug/kernel-2.6.32-220.el6/linux-2.6.32-220.el6.x86_64/net/socket.c: 1700
    0xffffffff81426a0c <sys_sendto+92>:     mov    %rax,%r14
    
    可以看到r14存放了sockfd_lookup_light的返回值,并且该寄存器没有再被修改,刚好调用sock_sendmsg,r14又压栈了,所以可以从栈中取出sock指针:
    
    
    crash> dis -l sock_sendmsg
    /usr/src/debug/kernel-2.6.32-220.el6/linux-2.6.32-220.el6.x86_64/net/socket.c: 576
    0xffffffff814260b0 <sock_sendmsg>:      push   %rbp
    0xffffffff814260b1 <sock_sendmsg+1>:    mov    %rsp,%rbp
    0xffffffff814260b4 <sock_sendmsg+4>:    sub    $0x1a0,%rsp
    0xffffffff814260bb <sock_sendmsg+11>:   mov    %rbx,-0x28(%rbp)
    0xffffffff814260bf <sock_sendmsg+15>:   mov    %r12,-0x20(%rbp)
    0xffffffff814260c3 <sock_sendmsg+19>:   mov    %r13,-0x18(%rbp)
    0xffffffff814260c7 <sock_sendmsg+23>:   mov    %r14,-0x10(%rbp)

     所以可以获取socket指针为:

    struct socket ffff88180357c400
    struct socket {
      state = SS_UNCONNECTED,
      type_begin = 0xffff88180357c404,
      type = 2,
      type_end = 0xffff88180357c408,
      flags = 0,
      fasync_list = 0x0,
      wait = {
        lock = {
          raw_lock = {
            slock = 2715525595
          }
        },
        task_list = {
          next = 0xffff8813bd0eb378,
          prev = 0xffff8813bd0eb378
        }
      },
      file = 0xffff88109b957ec0,
      sk = 0xffff8811f8e77100,
      ops = 0xffffffff81664840 <inet_dgram_ops>

    很容易根据 struct sock *sk = sock->sk;获取到对应的sk为 0xffff8811f8e77100 ,而这个指针是sock,inet_sock,udp_sock等的基础,因为他们起始地址是一样的。我们可以用skb来验证这个sk是否正常:

    crash> struct -x sk_buff.sk ffff881439a092c0
      sk = 0xffff8811f8e77100

    skb分析出来的sk和我们从sys_sendto分析的sk是一致的。

    crash> struct sock 0xffff8811f8e77100
    struct sock {
      __sk_common = {
        {
          skc_node = {
            next = 0xffff881727a3b100,
            pprev = 0xffff881236f157c0
          },
          skc_nulls_node = {
            next = 0xffff881727a3b100,
            pprev = 0xffff881236f157c0
          }
        },
        skc_refcnt = {
          counter = 2
        },
        skc_hash = 26668,
        skc_family = 2,
        skc_state = 1 '01',
        skc_reuse = 0 '00',
        skc_bound_dev_if = 0,
        skc_bind_node = {
          next = 0x0,
          pprev = 0x0
        },
        skc_prot = 0xffffffff81b175c0 <udp_prot>,
        skc_net = 0xffffffff8201f300 <init_net>
      },
      flags_begin = 0xffff8811f8e77140,
      sk_shutdown = 0,
      sk_no_check = 0,
      sk_userlocks = 13,
      sk_protocol = 17,
      sk_type = 2,
      flags_end = 0xffff8811f8e77144,
      sk_rcvbuf = 8388608,
      sk_lock = {
        slock = {
          raw_lock = {
            slock = 1400263542
          }
        },
        owned = 0,
        wq = {
          lock = {
            raw_lock = {
              slock = 131074
            }
          },
          task_list = {
            next = 0xffff8811f8e77158,
            prev = 0xffff8811f8e77158
          }
        }
      },
      sk_backlog = {
        head = 0x0,
        tail = 0x0
      },
      sk_sleep = 0xffff88180357c418,
      sk_dst_cache = 0xffff8813b02e3800,
      sk_policy = {0x0, 0x0},
      sk_dst_lock = {
        raw_lock = {
          lock = 16777216
        }
      },
      sk_rmem_alloc = {
        counter = 552
      },
      sk_wmem_alloc = {
        counter = 425
      },
      sk_omem_alloc = {
        counter = 0
      },
      sk_sndbuf = 1048568,
      sk_receive_queue = {
        next = 0xffff880c94c6acc0,
        prev = 0xffff880c94c6acc0,
        qlen = 1,
        lock = {
          raw_lock = {
            slock = 309990010
          }
        }
      },
      sk_write_queue = {
        next = 0xffff8811f8e771c8,
        prev = 0xffff8811f8e771c8,
        qlen = 0,
        lock = {
          raw_lock = {
            slock = 0
          }
        }
      },
      sk_async_wait_queue = {
        next = 0xffff8811f8e771e0,
        prev = 0xffff8811f8e771e0,
        qlen = 0,
        lock = {
          raw_lock = {
            slock = 0
          }
        }
      },
      sk_wmem_queued = 0,
      sk_forward_alloc = 3544,
      sk_allocation = 208,
      sk_route_caps = 0,
      sk_gso_type = 0,
      sk_gso_max_size = 0,
      sk_rcvlowat = 1,
      sk_flags = 256,
      sk_lingertime = 0,
      sk_error_queue = {
        next = 0xffff8811f8e77228,
        prev = 0xffff8811f8e77228,
        qlen = 0,
        lock = {
          raw_lock = {
            slock = 0
          }
        }
      },
      sk_prot_creator = 0xffffffff81b175c0 <udp_prot>,
      sk_callback_lock = {
        raw_lock = {
          lock = 16777216
        }
      },
      sk_err = 0,
      sk_err_soft = 0,
      sk_drops = {
        counter = 0
      },
      sk_ack_backlog = 0,
      sk_max_ack_backlog = 0,
      sk_priority = 0,
      sk_peercred = {
        pid = 0,
        uid = 4294967295,
        gid = 4294967295
      },
      sk_rcvtimeo = 9223372036854775807,
      sk_sndtimeo = 9223372036854775807,
      sk_filter = 0x0,
      sk_protinfo = 0x0,
      sk_timer = {
        entry = {
          next = 0x0,
          prev = 0x0
        },
        expires = 0,
        function = 0x0,
        data = 0,
        base = 0xffff881811e94000,
        start_site = 0x0,
        start_comm = "000000000000000000000000000000",
        start_pid = -1
      },
      sk_stamp = {
        tv64 = 0
      },
      sk_socket = 0xffff88180357c400,
      sk_user_data = 0x0,
      sk_sndmsg_page = 0x0,
      sk_send_head = 0x0,
      sk_sndmsg_off = 0,
      sk_write_pending = 0,
      sk_security = 0x0,
      sk_mark = 0,
      sk_classid = 0,
      sk_state_change = 0xffffffff81429380 <sock_def_wakeup>,
      sk_data_ready = 0xffffffff814298b0 <sock_def_readable>,
      sk_write_space = 0xffffffff81429810 <sock_def_write_space>,
      sk_error_report = 0xffffffff81429790 <sock_def_error_report>,
      sk_backlog_rcv = 0xffffffff81495fa0 <__udp_queue_rcv_skb>,
      sk_destruct = 0xffffffff814a13c0 <inet_sock_destruct>
    }

    同理我们根据汇编可以分析kiocb 参数和 sock_iocb 参数:

    /usr/src/debug/kernel-2.6.32-220.el6/linux-2.6.32-220.el6.x86_64/net/socket.c: 581
    0xffffffff814260ff <sock_sendmsg+79>:   movq   $0x0,-0x190(%rbp)-------------这个就是操作 kiocb.ki_flags = 0;
    0xffffffff8142610a <sock_sendmsg+90>:   movl   $0x1,-0x188(%rbp)
    0xffffffff81426114 <sock_sendmsg+100>:  movl   $0xffffffff,-0x184(%rbp)
    0xffffffff8142611e <sock_sendmsg+110>:  movq   $0x0,-0x180(%rbp)
    /usr/src/debug/kernel-2.6.32-220.el6/linux-2.6.32-220.el6.x86_64/include/linux/list.h: 30
    0xffffffff81426129 <sock_sendmsg+121>:  mov    %rax,-0x130(%rbp)
    /usr/src/debug/kernel-2.6.32-220.el6/linux-2.6.32-220.el6.x86_64/include/linux/list.h: 31
    0xffffffff81426130 <sock_sendmsg+128>:  mov    %rax,-0x128(%rbp)
    /usr/src/debug/kernel-2.6.32-220.el6/linux-2.6.32-220.el6.x86_64/net/socket.c: 582
    0xffffffff81426137 <sock_sendmsg+135>:  lea    -0xb0(%rbp),%rax  ------------这个就是取iocb.private = &siocb;
    crash> struct -x kiocb 0xffff881723cefc98
    struct kiocb {
      ki_run_list = {
        next = 0xffff881700000064,
        prev = 0xffff881723cefca0
      },
      ki_flags = 0x0,
      ki_users = 0x1,
      ki_key = 0xffffffff,
      ki_filp = 0x0,
      ki_ctx = 0x0,
      ki_cancel = 0x0,
      ki_retry = 0x0,
      ki_dtor = 0x0,
      ki_obj = {
        user = 0xffff881723ce8080,
        tsk = 0xffff881723ce8080
      },
      ki_user_data = 0x0,
      ki_wait = {
        flags = 0x0,
        private = 0xffff881723ce8080,
        func = 0xffffffff810925e0 <autoremove_wake_function>,
        task_list = {
          next = 0xffff881723cefd08,
          prev = 0xffff881723cefd08
        }
      },
      ki_pos = 0xffff880b74cfdea8,
      private = 0xffff881723cefd88,------------------这个就是&siocb
      ki_opcode = 0xffff,
      ki_nbytes = 0xffff881723cefd68,
      ki_buf = 0xffffffea <Address 0xffffffea out of bounds>,
      ki_left = 0x0,
      ki_inline_vec = {
        iov_base = 0xffff881723cefdb8,
        iov_len = 0xffffffff810a4b2e
      },
      ki_iovec = 0x6038,
      ki_nr_segs = 0xffffffff81ecbdc0,
      ki_cur_seg = 0x1d4e000,
      ki_list = {
        next = 0xffff880c06021180,
        prev = 0x3b0
      },
      ki_eventfd = 0x2654dacc
    }
    crash> px 0xffff881723cefe38-0xb0
    $16 = 0xffff881723cefd88
    crash> struct sock_iocb 0xffff881723cefd88
    struct sock_iocb {
      list = {
        next = 0xffffffff8100bdee <reschedule_interrupt+14>,
        prev = 0xffff881723cefe18
      },
      flags = 659,
      size = 84,
      sock = 0xffff88180357c400,
      sk = 0x1c,
      scm = 0x0,
      msg = 0xffff881723cefe58,
      async_msg = {
        msg_name = 0x1,
        msg_namelen = -1684701504,
        msg_iov = 0xffff881723cefeb0,
        msg_iovlen = 18446612203643961072,
        msg_control = 0xffffffffffffff02,
        msg_controllen = 18446744071580416266,
        msg_flags = 28
      },
      kiocb = 0xffff881723cefeb8
    }

    既然已经有了sock指针,那么获取的inet_sock.opt  为:

    crash> struct inet_sock.opt 0xffff8811f8e77100
      opt = 0xffff881656d78fc0

    很奇怪,我们回到本文开始的ipc.opt,在udp_sendmsg中,我们看到ipc.opt被赋值为:

        689         if (!ipc.opt)//开始为NULL,满足条件,然后赋值为inet->opt
        690                 ipc.opt = inet->opt;
    ipc.opt为0xffff881723cefbb0 ,而正常这个值应该等于inet.opt才对,唯一的可能是,inet->opt在赋值给ipc.opt之后,被其他流程修改了。

    有个老同事怀疑,可能是因为inet->opt在赋值给ipc.opt之后,并且在ipc.opt被使用之前,被其他setsockopt的流程给改了。因为setsockopt的流程中,有如下的调用链:

    ip_setsockopt-》  sys_setsockopt-->sock_common_setsockopt-->udp_setsockopt-->ip_setsockopt-->do_ip_setsockopt
     
    而在 do_ip_setsockopt 的执行流中,有如下流程:
            opt = xchg(&inet->opt, opt);
            kfree(opt);
    inet->opt 之前的指针,返回给opt,然后直接kfree掉了,如果ipc->opt使用的是这个after free这个状态的inet->opt的话,并且这个slab已经被别人重新占用了,就会出现两个指针不一致的情况。
    补丁的话,可以参考:
    https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=f6d8bd051c391c1c0458a30b2a7abcd939329259
     
    水平有限,如果有错误,请帮忙提醒我。如果您觉得本文对您有帮助,可以点击下面的 推荐 支持一下我。版权所有,需要转发请带上本文源地址,博客一直在更新,欢迎 关注 。
  • 相关阅读:
    java 全组合 与全排列
    (转)java +libsvm 安装与测试:
    ictclas4j 分词工具包 安装流程
    (转)超详细单机版搭建hadoop环境图文解析
    HTML+CSS 滚动条样式自定义
    HTML+CSS之iframe
    思维导图工具 & 原型设计工具
    js setInterval 启用&停止
    jquery 获取 checkbox 的 checked 状态问题
    几个实用的 jQuery 插件
  • 原文地址:https://www.cnblogs.com/10087622blog/p/9485148.html
Copyright © 2011-2022 走看看