zoukankan      html  css  js  c++  java
  • gdb调试分析多线程死锁

    转载:

    http://blog.chinaunix.net/uid-30343738-id-5757210.html

    #include <stdio.h>
    #include <pthread.h>
    #include <stdlib.h>
    #include <unistd.h>
    
    static int sequence1 = 0;
    static int sequence2 = 0;
    
    pthread_mutex_t lock1;
    pthread_mutex_t lock2;
    
    int func1()
    {
        pthread_mutex_lock(&lock1); 
        ++sequence1; 
        sleep(1); 
        pthread_mutex_lock(&lock2); 
        ++sequence2; 
        pthread_mutex_unlock(&lock2); 
        pthread_mutex_unlock(&lock1); 
    
        return sequence1; 
    }
    
    int func2()
    {
        pthread_mutex_lock(&lock2); 
        ++sequence2; 
        sleep(1); 
        pthread_mutex_lock(&lock1); 
        ++sequence2; 
        pthread_mutex_unlock(&lock1); 
        pthread_mutex_unlock(&lock2); 
    
        return sequence1; 
    }
    
    
    void* thread1(void *arg)
    {
        int rev = 0;
        while(1)
        {
            rev = func1();
            
            if (rev == 100000)
            {
                pthread_exit(NULL);
            }
        }
    }
    
    void* thread2(void *arg)
    {
        int rev = 0;
        while(1)
        {
            rev = func2();
            
            if (rev == 100000)
            {
                pthread_exit(NULL);
            }
        }
    }
    
    void* thread3(void *arg)
    {
        int count = 0;
        while(1)
        {
            sleep(1);
            if ( count++ > 10000)
            {
                pthread_exit(NULL);
            }
        }
    }
    
    void* thread4(void *arg)
    {
        int count = 0;
        while(1)
        {
            sleep(1);
            if ( count++ > 10000)
            {
                pthread_exit(NULL);
            }
        }
    }
    
    
    
    int main()
    {
        pthread_t tid[4];
        
        pthread_mutex_init(&lock1, NULL);
    
        pthread_mutex_init(&lock2, NULL);
    
        
    
        if(pthread_create(&tid[0], NULL, &thread1, NULL) != 0)
        {
            _exit(1);
        }
    
        if(pthread_create(&tid[1], NULL, &thread2, NULL) != 0)
        {
            _exit(1);
        }
    
        if(pthread_create(&tid[2], NULL, &thread3, NULL) != 0)
        {
            _exit(1);
        }
    
        if(pthread_create(&tid[3], NULL, &thread4, NULL) != 0)
        {
            _exit(1);
        }
    
        sleep(5);
    
    
        pthread_join(tid[0], NULL);
        pthread_join(tid[1], NULL);
        pthread_join(tid[2], NULL);
        pthread_join(tid[3], NULL);
    
    
        pthread_mutex_destroy( &lock1 );
        pthread_mutex_destroy( &lock2 );
    
        return 0;
    }

    编译执行程序。

    gcc -o main main17.c -lpthread -g

     

    使用 pstack 和 gdb 工具对死锁程序进行分析

    1、使用pstack 

    查找测试程序的进程号

    root 5383 1 0 06:31 ? 00:00:43 gedit /root/Project/xa/main17.c
    root 7197 7179 0 10:04 pts/1 00:00:00 ./main
    root 7218 7206 0 10:04 pts/2 00:00:00 grep --color=auto main

    对死锁进程第一次执行 pstack(pstack –进程号)的输出结果

     Thread 5 (Thread 0x41e37940 (LWP 6722)): 
     #0  0x0000003d1a80d4c4 in __lll_lock_wait () from /lib64/libpthread.so.0 
     #1  0x0000003d1a808e1a in _L_lock_1034 () from /lib64/libpthread.so.0 
     #2  0x0000003d1a808cdc in pthread_mutex_lock () from /lib64/libpthread.so.0 
     #3  0x0000000000400a9b in func1() () 
     #4  0x0000000000400ad7 in thread1(void*) () 
     #5  0x0000003d1a80673d in start_thread () from /lib64/libpthread.so.0 
     #6  0x0000003d19cd40cd in clone () from /lib64/libc.so.6 
     Thread 4 (Thread 0x42838940 (LWP 6723)): 
     #0  0x0000003d1a80d4c4 in __lll_lock_wait () from /lib64/libpthread.so.0 
     #1  0x0000003d1a808e1a in _L_lock_1034 () from /lib64/libpthread.so.0 
     #2  0x0000003d1a808cdc in pthread_mutex_lock () from /lib64/libpthread.so.0 
     #3  0x0000000000400a17 in func2() () 
     #4  0x0000000000400a53 in thread2(void*) () 
     #5  0x0000003d1a80673d in start_thread () from /lib64/libpthread.so.0 
     #6  0x0000003d19cd40cd in clone () from /lib64/libc.so.6 
     Thread 3 (Thread 0x43239940 (LWP 6724)): 
     #0  0x0000003d19c9a541 in nanosleep () from /lib64/libc.so.6 
     #1  0x0000003d19c9a364 in sleep () from /lib64/libc.so.6 
     #2  0x00000000004009bc in thread3(void*) () 
     #3  0x0000003d1a80673d in start_thread () from /lib64/libpthread.so.0 
     #4  0x0000003d19cd40cd in clone () from /lib64/libc.so.6 
     Thread 2 (Thread 0x43c3a940 (LWP 6725)): 
     #0  0x0000003d19c9a541 in nanosleep () from /lib64/libc.so.6 
     #1  0x0000003d19c9a364 in sleep () from /lib64/libc.so.6 
     #2  0x0000000000400976 in thread4(void*) () 
     #3  0x0000003d1a80673d in start_thread () from /lib64/libpthread.so.0 
     #4  0x0000003d19cd40cd in clone () from /lib64/libc.so.6 
     Thread 1 (Thread 0x2b984ecabd90 (LWP 6721)): 
     #0  0x0000003d1a807b35 in pthread_join () from /lib64/libpthread.so.0 
     #1  0x0000000000400900 in main ()  

     对死锁进程第二次执行 pstack(pstack –进程号)的输出结果

     Thread 5 (Thread 0x40bd6940 (LWP 6722)): 
     #0  0x0000003d1a80d4c4 in __lll_lock_wait () from /lib64/libpthread.so.0 
     #1  0x0000003d1a808e1a in _L_lock_1034 () from /lib64/libpthread.so.0 
     #2  0x0000003d1a808cdc in pthread_mutex_lock () from /lib64/libpthread.so.0 
     #3  0x0000000000400a87 in func1() () 
     #4  0x0000000000400ac3 in thread1(void*) () 
     #5  0x0000003d1a80673d in start_thread () from /lib64/libpthread.so.0 
     #6  0x0000003d19cd40cd in clone () from /lib64/libc.so.6 
     Thread 4 (Thread 0x415d7940 (LWP 6723)): 
     #0  0x0000003d1a80d4c4 in __lll_lock_wait () from /lib64/libpthread.so.0 
     #1  0x0000003d1a808e1a in _L_lock_1034 () from /lib64/libpthread.so.0 
     #2  0x0000003d1a808cdc in pthread_mutex_lock () from /lib64/libpthread.so.0 
     #3  0x0000000000400a03 in func2() () 
     #4  0x0000000000400a3f in thread2(void*) () 
     #5  0x0000003d1a80673d in start_thread () from /lib64/libpthread.so.0 
     #6  0x0000003d19cd40cd in clone () from /lib64/libc.so.6 
     Thread 3 (Thread 0x41fd8940 (LWP 6724)): 
     #0  0x0000003d19c7aec2 in memset () from /lib64/libc.so.6 
     #1  0x00000000004009be in thread3(void*) () 
     #2  0x0000003d1a80673d in start_thread () from /lib64/libpthread.so.0 
     #3  0x0000003d19cd40cd in clone () from /lib64/libc.so.6 
     Thread 2 (Thread 0x429d9940 (LWP 6725)): 
     #0  0x0000003d19c7ae0d in memset () from /lib64/libc.so.6 
     #1  0x0000000000400982 in thread4(void*) () 
     #2  0x0000003d1a80673d in start_thread () from /lib64/libpthread.so.0 
     #3  0x0000003d19cd40cd in clone () from /lib64/libc.so.6 
     Thread 1 (Thread 0x2af906fd9d90 (LWP 6721)): 
     #0  0x0000003d1a807b35 in pthread_join () from /lib64/libpthread.so.0 
     #1  0x0000000000400900 in main () 

     

    连续多次查看这个进程的函数调用关系堆栈进行分析:当进程吊死时,多次使用 pstack 查看进程的函数调用堆栈,死锁线程将一直处于等锁的状态,对比多次的函数调用堆栈输出结果,

    确定哪两个线程(或者几个线程)一直没有变化且一直处于等锁的状态(可能存在两个线程 一直没有变化)。

    输出分析:

    根据上面的输出对比可以发现,线程 1 和线程 2 由第一次 pstack 输出的处在 sleep 函数变化为第二次 pstack 输出的处在 memset 函数。但是线程 4 和线程 5 一直处在等锁状态(pthread_mutex_lock),

    在连续两次的 pstack 信息输出中没有变化,所以我们可以推测线程 4 和线程 5 发生了死锁

     

    2、使用gdb进行进一步的分析

    查找测试程序的进程号

    root 5383 1 0 06:31 ? 00:00:43 gedit /root/Project/xa/main17.c
    root 7197 7179 0 10:04 pts/1 00:00:00 ./main
    root 7218 7206 0 10:04 pts/2 00:00:00 grep --color=auto main

    使用gdb 的attach功能

    gdb attach 7197

    查看当前进程的线程信息

    (gdb) info thread
    Id Target Id Frame
    5 Thread 0xb7539b40 (LWP 7198) "main" 0xb7717424 in __kernel_vsyscall ()
    4 Thread 0xb6d38b40 (LWP 7199) "main" 0xb7717424 in __kernel_vsyscall ()
    3 Thread 0xb6537b40 (LWP 7200) "main" 0xb7717424 in __kernel_vsyscall ()
    2 Thread 0xb5d36b40 (LWP 7201) "main" 0xb7717424 in __kernel_vsyscall ()
    * 1 Thread 0xb753a6c0 (LWP 7197) "main" 0xb7717424 in __kernel_vsyscall ()

     

     切换到线程 5 的输出

    (gdb) thread  5
    [Switching to thread 5 (Thread 0xb7539b40 (LWP 7198))]
    #0 0xb7717424 in __kernel_vsyscall ()
    (gdb) where
    #0 0xb7717424 in __kernel_vsyscall ()
    #1 0xb76f25a2 in __lll_lock_wait () from /lib/i386-linux-gnu/libpthread.so.0
    #2 0xb76edead in _L_lock_686 () from /lib/i386-linux-gnu/libpthread.so.0
    #3 0xb76edcf3 in pthread_mutex_lock ()
    from /lib/i386-linux-gnu/libpthread.so.0
    #4 0x0804864b in func1 () at main17.c:17
    #5 0x080486ef in thread1 (arg=0x0) at main17.c:44
    #6 0xb76ebd4c in start_thread () from /lib/i386-linux-gnu/libpthread.so.0
    #7 0xb762adde in clone () from /lib/i386-linux-gnu/libc.so.6
    (gdb) f  4
    #4 0x0804864b in func1 () at main17.c:17
    warning: Source file is more recent than executable.
    17 pthread_mutex_lock(&lock2);     ////线程 5 正试图获得锁 lock2

     

    切换到线程4的输出

    (gdb) thread 4
    [Switching to thread 4 (Thread 0xb6d38b40 (LWP 7199))]
    #0 0xb7717424 in __kernel_vsyscall ()
    (gdb) where
    #0 0xb7717424 in __kernel_vsyscall ()
    #1 0xb76f25a2 in __lll_lock_wait () from /lib/i386-linux-gnu/libpthread.so.0
    #2 0xb76edead in _L_lock_686 () from /lib/i386-linux-gnu/libpthread.so.0
    #3 0xb76edcf3 in pthread_mutex_lock ()
    from /lib/i386-linux-gnu/libpthread.so.0
    #4 0x080486ae in func2 () at main17.c:30
    #5 0x0804871c in thread2 (arg=0x0) at main17.c:58
    #6 0xb76ebd4c in start_thread () from /lib/i386-linux-gnu/libpthread.so.0
    #7 0xb762adde in clone () from /lib/i386-linux-gnu/libc.so.6
    (gdb) f 4
    #4 0x080486ae in func2 () at main17.c:30
    30 pthread_mutex_lock(&lock1);      //线程 4 正试图获得锁 lock1

    打印锁的信息

    (gdb) p lock1
    $1 = {__data = {__lock = 2, __count = 0, __owner = 7198, __kind = 0,
    __nusers = 1, {__spins = 0, __list = {__next = 0x0}}},
    __size = "0200000000000000363400000000000001000000000000", __align = 2}
    (gdb) p lock2
    $2 = {__data = {__lock = 2, __count = 0, __owner = 7199, __kind = 0,
    __nusers = 1, {__spins = 0, __list = {__next = 0x0}}},
    __size = "0200000000000000373400000000000001000000000000", __align = 2}

     

    从上面可以发现,线程 4 正试图获得锁 lock1,但是锁 lock1已经被 LWP 为 7198的线程得到(__owner = 7198),

    线程 5 正试图获得锁 lock2,但是锁 lock2 已经被 LWP 为 7199的 得到(__owner = 7199),从 pstack 的输出可以发现(gdb info thread),LWP 7198与线程 5 是对应的,LWP 7199与线程 4 是对应的。

    所以我们可以得出, 线程 4 和线程 5 发生了交叉持锁的死锁现象。查看线程的源代码发现,线程 4 和线程 5 同时使用 mutex1 和 mutex2,且申请顺序不合理

     

  • 相关阅读:
    【SCOI 2011】 糖果
    【POJ 3159】 Candies
    【POJ 1716】 Integer Intervals
    【POJ 2983】 Is the information reliable?
    【POJ 1364】 King
    【POJ 1201】 Intervals
    【POJ 1804】 Brainman
    6月10日省中提高组题解
    【POJ 3352】 Road Construction
    【POJ 1144】 Network
  • 原文地址:https://www.cnblogs.com/zhangxuan/p/6385329.html
Copyright © 2011-2022 走看看