zoukankan      html  css  js  c++  java
  • 东师理想云平台故障处理一例

    pstree -p `ps -e | grep python | awk '{print $1}'`

    kill -9 `ps -e|grep python  | awk '{print $1}'`

    vi /etc/crontab

    # 解决思路
    export MALLOC_ARENA_MAX=1

    # Java程序在Linux上运行虚拟内存耗用很大
    https://blog.csdn.net/u010686469/article/details/77319599

    #Java 进程占用 VIRT 虚拟内存超高的问题研究
    https://www.cnblogs.com/seasonsluo/p/java_virt.html

    pmap -x 32520 | grep anon

    #  重启两个占用虚拟内存大的进程
    ps -ef | grep tomcat
    kill -9 PID
    service rabbitmq-server restart

    119486 java8
    33419  java7
    119486 

    # 查看内存溢出日志
    cat /var/log/messages | grep 'Out of memory' -C 5

    #Out of memory: Kill process 解决
    https://blog.51cto.com/qiangsh/2066747

    [root@localhost ~]# cat /var/log/messages | grep 'Out of memory' -C 5
    Aug 20 18:15:59 localhost kernel: [71037]    89 71037    20282      117   5       0             0 cleanup
    Aug 20 18:15:59 localhost kernel: [71039]    89 71039    20256      162   6       0             0 bounce
    Aug 20 18:15:59 localhost kernel: [72029]     0 72029    35007      155   0       0             0 crond
    Aug 20 18:15:59 localhost kernel: [72329]     0 72329    19143       82   1       0             0 sendmail
    Aug 20 18:15:59 localhost kernel: [72338]     0 72338     2275       16  17       0             0 sh
    Aug 20 18:15:59 localhost kernel: Out of memory: Kill process 56436 (find) score 267 or sacrifice child
    Aug 20 18:15:59 localhost kernel: Killed process 56436, UID 0, (find) total-vm:19784052kB, anon-rss:10437824kB, file-rss:8kB
    Aug 20 18:20:15 localhost NTP: 20 Aug 18:20:15 ntpdate[72927]: adjust time server 185.198.26.172 offset 0.017783 sec
    Aug 20 18:30:15 localhost NTP: 20 Aug 18:30:14 ntpdate[74577]: adjust time server 54.183.204.201 offset -0.012002 sec
    Aug 20 18:32:29 localhost kernel: sh invoked oom-killer: gfp_mask=0x200da, order=0, oom_adj=0, oom_score_adj=0
    Aug 20 18:32:29 localhost kernel: sh cpuset=/ mems_allowed=0-1
    --
    Aug 20 18:32:29 localhost kernel: [74838]     0 74838    26489       37  18       0             0 awk
    Aug 20 18:32:29 localhost kernel: [74839]     0 74839    26308       28  21       0             0 sed
    Aug 20 18:32:29 localhost kernel: [74841]     0 74841     2275       14   0       0             0 sh
    Aug 20 18:32:29 localhost kernel: [74842]     0 74842     1541       29  17       0             0 sshpass
    Aug 20 18:32:29 localhost kernel: [74843]   497 74843     2275       13  19       0             0 sh
    Aug 20 18:32:29 localhost kernel: Out of memory: Kill process 51244 (find) score 408 or sacrifice child
    Aug 20 18:32:29 localhost kernel: Killed process 51244, UID 0, (find) total-vm:29107212kB, anon-rss:15434620kB, file-rss:4kB
    Aug 20 18:40:12 localhost NTP: 20 Aug 18:40:12 ntpdate[76243]: adjust time server 103.105.51.156 offset 0.012008 sec
    Aug 20 18:50:12 localhost NTP: 20 Aug 18:50:11 ntpdate[77522]: adjust time server 103.105.51.156 offset -0.002601 sec
    Aug 20 18:57:15 localhost kernel: java invoked oom-killer: gfp_mask=0x201da, order=0, oom_adj=0, oom_score_adj=0
    Aug 20 18:57:15 localhost kernel: java cpuset=/ mems_allowed=0-1
    --
    Aug 20 18:57:15 localhost kernel: [78302]    89 78302    20282      227  16       0             0 cleanup
    Aug 20 18:57:15 localhost kernel: [78342]     0 78342    34942       90   0       0             0 crond
    Aug 20 18:57:15 localhost kernel: [78348]     0 78348    26519       45   1       0             0 freemem.sh
    Aug 20 18:57:15 localhost kernel: [78376]     0 78376       75        9   1       0             0 sync
    Aug 20 18:57:15 localhost kernel: [78377]     0 78377      297       11   0       0             0 sh
    Aug 20 18:57:15 localhost kernel: Out of memory: Kill process 30747 (find) score 848 or sacrifice child
    Aug 20 18:57:15 localhost kernel: Killed process 30747, UID 0, (find) total-vm:58109856kB, anon-rss:30773196kB, file-rss:4kB
    Aug 20 19:00:06 localhost NTP: 20 Aug 19:00:06 ntpdate[115182]: adjust time server 74.208.26.225 offset 0.030441 sec
    Aug 20 19:10:06 localhost NTP: 20 Aug 19:10:06 ntpdate[59888]: adjust time server 206.55.191.142 offset -0.023210 sec
    Aug 20 19:20:06 localhost NTP: 20 Aug 19:20:06 ntpdate[6326]: adjust time server 66.79.136.240 offset 0.017423 sec
    Aug 20 19:30:06 localhost NTP: 20 Aug 19:30:06 ntpdate[148943]: adjust time server 45.79.1.70 offset -0.016261 sec
    --
    Oct 16 11:59:42 localhost kernel: [107168]     0 107168    14347        2   0       0             0 sftp-server
    Oct 16 11:59:42 localhost kernel: [108234]    89 108234    20246      224   1       0             0 pickup
    Oct 16 11:59:42 localhost kernel: [108512]    89 108512    20282      228   0       0             0 cleanup
    Oct 16 11:59:42 localhost kernel: [108514]     0 108514    20331      291   2       0             0 local
    Oct 16 11:59:42 localhost kernel: [108516]    89 108516    20256      224   0       0             0 bounce
    Oct 16 11:59:42 localhost kernel: Out of memory: Kill process 100492 (find) score 912 or sacrifice child
    Oct 16 11:59:42 localhost kernel: Killed process 100492, UID 0, (find) total-vm:62385608kB, anon-rss:31424580kB, file-rss:4kB
    Oct 16 12:00:07 localhost NTP: 16 Oct 12:00:07 ntpdate[108604]: adjust time server 72.30.35.89 offset -0.003301 sec
    Oct 16 12:10:06 localhost NTP: 16 Oct 12:10:06 ntpdate[109099]: adjust time server 198.255.68.106 offset -0.005266 sec
    Oct 16 12:20:06 localhost NTP: 16 Oct 12:20:06 ntpdate[110347]: adjust time server 44.190.6.254 offset 0.019827 sec



    # 扩大JVM内存
    vi  /usr/local/tomcat7/bin/catalina.sh

    # 原配置
    #JAVA_OPTS="-Xms256m -Xmx512m -Xss1024K -XX:PermSize=128m -XX:MaxPermSize=256m"

    # 32G内存参考配置
    JAVA_OPTS="-server  -Xms10g -Xmx10g -XX:PermSize=1g -XX:MaxPermSize=2g -Xshare:off -Xmn1024m"

    cat /usr/local/tomcat7/logs/catalina.out | grep 'com.alibaba.druid.pool.GetConnectionTimeoutException' -C 5

    Caused by: com.alibaba.druid.pool.GetConnectionTimeoutException: wait millis 30000, active 50, runningSqlCount 3 : INSERT INTO T_TK_QUESTION_BASE (QUESTION_ID_CHAR,QUESTION_TITLE,QUESTION_TIPS,QUESTION_TYPE_ID,QUESTION_TYPE_NAME,QUESTION_DIFFICULT_ID,QUESTION_DIFFICULT_NAME,QUESTION_DIFFICULT_STAR,QUESTION_ANSWER,CREATE_TIME,CREATE_PERSON,B_USE,SOURCE_ID,TS,USE_COUNT,USE_RANGE,KG_ZG,FILE_ID,HEIGHT,PRODUCT_ID,CHECK_STATUS,CHECK_MESSAGE,PARENT_ID_CHAR,JSON_QUESTION,JSON_ANSWER,APP_TYPE,OPTIONS_COUNT,SUBJECT_ID,HAVE_CHILD,CONTENT_MD5,CONTENT_MD5_NEW_UNIQUE) VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)
            at com.alibaba.druid.pool.DruidDataSource.getConnectionInternal(DruidDataSource.java:1071)
            at com.alibaba.druid.pool.DruidDataSource.getConnectionDirect(DruidDataSource.java:898)

    数据库连接池不释放。解决思路:

     //数据源配置
            druid.setInitialSize(20);
            druid.setMinIdle(10);
            druid.setMaxActive(50);
            druid.setMaxWait(600000);
            druid.setTimeBetweenEvictionRunsMillis(60000);
            druid.setMinEvictableIdleTimeMillis(300000);
            druid.setValidationQuery("SELECT 'x'");
            druid.setTestWhileIdle(true);
            druid.setTestOnBorrow(false);
            druid.setTestOnReturn(false);
            druid.setMaxPoolPreparedStatementPerConnectionSize(20);
    
            //增加回收机制
            druid.setRemoveAbandoned(true);
            druid.setRemoveAbandonedTimeoutMillis(300);
            druid.setLogAbandoned(false);
  • 相关阅读:
    jquery利用event.which方法获取键盘输入值的代码
    C#计算某个时间距离当前日期的天数
    C#.net 货币格式转换
    用批处理来重启IIS的应用程序池
    C# .net 如何根据访问者IP获取所在地区
    C# 根据IP查询地址归属地
    windows Server 2008 IIS7 503错误解决方案
    技术选型
    bootstrap bable 自动换行问题
    .net 部署IIS 在服务器无法杀掉EXCEL进程
  • 原文地址:https://www.cnblogs.com/littlehb/p/11678817.html
Copyright © 2011-2022 走看看