zoukankan      html  css  js  c++  java
  • Shell编程之文本处理

    cut 截取自定列

    可以按照某个字符进行分割,然后取出其中的指定列:

    [root@iz8vbbqbnh4ug2q9so5jflz logs]# cat  localhost_access_log.2017-12-02.txt
    140.205.201.30 - - [02/Dec/2017:00:15:24 +0800] "GET / HTTP/1.1" 404 -
    140.205.201.30 - - [02/Dec/2017:00:17:51 +0800] "GET /rs-status HTTP/1.1" 404 -
    140.205.201.30 - - [02/Dec/2017:00:19:06 +0800] "GET /phpmyadmin/ HTTP/1.1" 404 -
    140.205.201.30 - - [02/Dec/2017:00:19:06 +0800] "POST /phpmyadmin/ HTTP/1.1" 404 -
    140.205.201.30 - - [02/Dec/2017:00:19:06 +0800] "GET /phpmyadmin/ HTTP/1.1" 404 -
    140.205.201.30 - - [02/Dec/2017:00:19:06 +0800] "POST /phpmyadmin/ HTTP/1.1" 404 -
    140.205.201.30 - - [02/Dec/2017:00:19:06 +0800] "GET /phpmyadmin/ HTTP/1.1" 404 -
    140.205.201.30 - - [02/Dec/2017:00:19:06 +0800] "POST /phpmyadmin/ HTTP/1.1" 404 -
    140.205.201.30 - - [02/Dec/2017:00:19:06 +0800] "GET /phpmyadmin/ HTTP/1.1" 404 -
    140.205.201.30 - - [02/Dec/2017:00:19:06 +0800] "POST /phpmyadmin/ HTTP/1.1" 404 -
    140.205.201.30 - - [02/Dec/2017:00:19:06 +0800] "GET /phpmyadmin/ HTTP/1.1" 404 -
    140.205.201.30 - - [02/Dec/2017:00:19:06 +0800] "POST /phpmyadmin/ HTTP/1.1" 404 -
    140.205.201.30 - - [02/Dec/2017:00:19:06 +0800] "GET /phpmyadmin/ HTTP/1.1" 404 -
    140.205.201.30 - - [02/Dec/2017:00:19:06 +0800] "POST /phpmyadmin/ HTTP/1.1" 404 -
    140.205.201.30 - - [02/Dec/2017:00:19:06 +0800] "GET /phpmyadmin/ HTTP/1.1" 404 -
    140.205.201.30 - - [02/Dec/2017:00:19:06 +0800] "POST /phpmyadmin/ HTTP/1.1" 404 -
    140.205.201.30 - - [02/Dec/2017:00:19:06 +0800] "GET /phpmyadmin/ HTTP/1.1" 404 -
    140.205.201.30 - - [02/Dec/2017:00:19:07 +0800] "POST /phpmyadmin/ HTTP/1.1" 404 -
    140.205.201.30 - - [02/Dec/2017:00:19:07 +0800] "GET /phpmyadmin/ HTTP/1.1" 404 -
    140.205.201.30 - - [02/Dec/2017:00:19:07 +0800] "POST /phpmyadmin/ HTTP/1.1" 404 -
    140.205.201.30 - - [02/Dec/2017:00:19:07 +0800] "GET /phpmyadmin/ HTTP/1.1" 404 -
    140.205.201.30 - - [02/Dec/2017:00:19:07 +0800] "POST /phpmyadmin/ HTTP/1.1" 404 -
    140.205.201.30 - - [02/Dec/2017:00:19:09 +0800] "GET /ganglia/index.php HTTP/1.1" 404 -
    164.132.91.1 - - [02/Dec/2017:00:22:21 +0800] "GET / HTTP/1.1" 404 -
    114.215.45.101 - - [02/Dec/2017:00:23:43 +0800] "GET / HTTP/1.1" 404 -
    140.205.201.30 - - [02/Dec/2017:00:32:41 +0800] "GET /index.php HTTP/1.1" 404 -
    140.205.201.30 - - [02/Dec/2017:00:39:08 +0800] "GET /jobs/ HTTP/1.1" 404 -
    [root@iz8vbbqbnh4ug2q9so5jflz logs]# cat  localhost_access_log.2017-12-02.txt |cut -d ' ' -f 6
    "GET
    "GET
    "GET
    "POST
    "GET
    "POST
    "GET
    "POST
    "GET
    "POST
    "GET
    "POST
    "GET
    "POST
    "GET
    "POST
    "GET
    "POST
    "GET
    "POST
    "GET
    "POST
    "GET
    "GET
    "GET
    "GET
    "GET

    可以指定更多的列:

    [root@iz8vbbqbnh4ug2q9so5jflz logs]# cat  localhost_access_log.2017-12-02.txt |cut -d ' ' -f 2,3,4
    - - [02/Dec/2017:00:15:24
    - - [02/Dec/2017:00:17:51
    - - [02/Dec/2017:00:19:06
    - - [02/Dec/2017:00:19:06
    - - [02/Dec/2017:00:19:06
    - - [02/Dec/2017:00:19:06
    - - [02/Dec/2017:00:19:06
    - - [02/Dec/2017:00:19:06
    - - [02/Dec/2017:00:19:06
    - - [02/Dec/2017:00:19:06
    - - [02/Dec/2017:00:19:06
    - - [02/Dec/2017:00:19:06
    - - [02/Dec/2017:00:19:06
    - - [02/Dec/2017:00:19:06
    - - [02/Dec/2017:00:19:06
    - - [02/Dec/2017:00:19:06
    - - [02/Dec/2017:00:19:06
    - - [02/Dec/2017:00:19:07
    - - [02/Dec/2017:00:19:07
    - - [02/Dec/2017:00:19:07
    - - [02/Dec/2017:00:19:07
    - - [02/Dec/2017:00:19:07
    - - [02/Dec/2017:00:19:09
    - - [02/Dec/2017:00:22:21
    - - [02/Dec/2017:00:23:43
    - - [02/Dec/2017:00:32:41
    - - [02/Dec/2017:00:39:08
    [root@iz8vbbqbnh4ug2q9so5jflz logs]# cat  localhost_access_log.2017-12-02.txt |cut -d ' ' -f 2,3,6-
    - - "GET / HTTP/1.1" 404 -
    - - "GET /rs-status HTTP/1.1" 404 -
    - - "GET /phpmyadmin/ HTTP/1.1" 404 -
    - - "POST /phpmyadmin/ HTTP/1.1" 404 -
    - - "GET /phpmyadmin/ HTTP/1.1" 404 -
    - - "POST /phpmyadmin/ HTTP/1.1" 404 -
    - - "GET /phpmyadmin/ HTTP/1.1" 404 -
    - - "POST /phpmyadmin/ HTTP/1.1" 404 -
    - - "GET /phpmyadmin/ HTTP/1.1" 404 -
    - - "POST /phpmyadmin/ HTTP/1.1" 404 -
    - - "GET /phpmyadmin/ HTTP/1.1" 404 -
    - - "POST /phpmyadmin/ HTTP/1.1" 404 -
    - - "GET /phpmyadmin/ HTTP/1.1" 404 -
    - - "POST /phpmyadmin/ HTTP/1.1" 404 -
    - - "GET /phpmyadmin/ HTTP/1.1" 404 -
    - - "POST /phpmyadmin/ HTTP/1.1" 404 -
    - - "GET /phpmyadmin/ HTTP/1.1" 404 -
    - - "POST /phpmyadmin/ HTTP/1.1" 404 -
    - - "GET /phpmyadmin/ HTTP/1.1" 404 -
    - - "POST /phpmyadmin/ HTTP/1.1" 404 -
    - - "GET /phpmyadmin/ HTTP/1.1" 404 -
    - - "POST /phpmyadmin/ HTTP/1.1" 404 -
    - - "GET /ganglia/index.php HTTP/1.1" 404 -
    - - "GET / HTTP/1.1" 404 -
    - - "GET / HTTP/1.1" 404 -
    - - "GET /index.php HTTP/1.1" 404 -
    - - "GET /jobs/ HTTP/1.1" 404 -

     sort 对列进行排序

    例如,对tomcat访问日志,对请求响应返回大小进行排序:

    cat localhost_access_log.2017-12-01.txt |sort -t ' ' -k 10

    -t : 指定分隔符

    -k : 指定排序的列

    114.241.108.197 - - [01/Dec/2017:09:03:45 +0800] "GET /js/plugin/jquery-file-upload/js/vendor/tmpl.min.js HTTP/1.1" 200 977
    114.241.108.197 - - [01/Dec/2017:11:45:30 +0800] "GET /js/plugin/jquery-file-upload/js/vendor/tmpl.min.js HTTP/1.1" 200 977
    114.241.108.197 - - [01/Dec/2017:14:41:04 +0800] "GET /js/plugin/jquery-file-upload/js/vendor/tmpl.min.js HTTP/1.1" 200 977
    223.72.82.98 - - [01/Dec/2017:15:26:10 +0800] "GET /js/plugin/jquery-file-upload/js/vendor/tmpl.min.js HTTP/1.1" 200 977
    59.108.217.106 - - [01/Dec/2017:09:35:17 +0800] "GET /js/plugin/jquery-file-upload/js/vendor/tmpl.min.js HTTP/1.1" 200 977
    59.108.217.106 - - [01/Dec/2017:13:08:46 +0800] "GET /js/plugin/jquery-file-upload/js/vendor/tmpl.min.js HTTP/1.1" 200 977
    114.241.108.197 - - [01/Dec/2017:09:03:32 +0800] "GET /img/logo-pale.png HTTP/1.1" 200 9775
    114.241.108.197 - - [01/Dec/2017:11:28:29 +0800] "GET /img/logo-pale.png HTTP/1.1" 200 9775
    114.241.108.197 - - [01/Dec/2017:14:40:51 +0800] "GET /img/logo-pale.png HTTP/1.1" 200 9775
    223.72.82.98 - - [01/Dec/2017:15:26:03 +0800] "GET /img/logo-pale.png HTTP/1.1" 200 9775
    59.108.217.106 - - [01/Dec/2017:09:35:01 +0800] "GET /img/logo-pale.png HTTP/1.1" 200 9775
    59.108.217.106 - - [01/Dec/2017:09:35:10 +0800] "GET /img/logo-pale.png HTTP/1.1" 200 9775
    59.108.217.106 - - [01/Dec/2017:13:08:52 +0800] "GET /img/logo-pale.png HTTP/1.1" 200 9775
    114.241.108.197 - - [01/Dec/2017:12:00:15 +0800] "GET /interview/detail.do?manageKey=15ba76c6fbeeccd2f8df875379ac88e9&targetPanel=dialog HTTP/1.1" 200 9952
    59.108.217.106 - - [01/Dec/2017:16:44:53 +0800] "GET /interview/detail.do?manageKey=15ba76c6fbeeccd2f8df875379ac88e9&targetPanel=dialog HTTP/1.1" 200 9952
    59.108.217.106 - - [01/Dec/2017:16:44:57 +0800] "GET /interview/detail.do?manageKey=15ba76c6fbeeccd2f8df875379ac88e9&targetPanel=dialog HTTP/1.1" 200 9952

    排序是由方向的,默认是升序排序,如果要降序排列,可以在列号后面增加一个r:

    cat localhost_access_log.2017-12-01.txt |sort -t ' ' -k 10r

    最后要注意的是,这里的排序默认是按字符串的字典顺序排列的,如果要按其数值拍,则需要增加一个n:

     cat localhost_access_log.2017-12-01.txt |sort -t ' ' -k 10n
    114.241.108.197 - - [01/Dec/2017:09:03:28 +0800] "GET /css/smartadmin-production.css HTTP/1.1" 200 394554
    114.241.108.197 - - [01/Dec/2017:11:28:29 +0800] "GET /css/smartadmin-production.css HTTP/1.1" 200 394554
    114.241.108.197 - - [01/Dec/2017:14:40:49 +0800] "GET /css/smartadmin-production.css HTTP/1.1" 200 394554
    223.72.82.98 - - [01/Dec/2017:15:25:59 +0800] "GET /css/smartadmin-production.css HTTP/1.1" 200 394554
    59.108.217.106 - - [01/Dec/2017:09:34:56 +0800] "GET /css/smartadmin-production.css HTTP/1.1" 200 394554
    59.108.217.106 - - [01/Dec/2017:09:35:06 +0800] "GET /css/smartadmin-production.css HTTP/1.1" 200 394554
    59.108.217.106 - - [01/Dec/2017:13:08:43 +0800] "GET /css/smartadmin-production.css HTTP/1.1" 200 394554
    112.65.193.14 - - [01/Dec/2017:11:28:44 +0800] "GET /js/jqueryui/1.10.3/jquery-ui.min.js HTTP/1.1" 200 435844
    114.241.108.197 - - [01/Dec/2017:09:03:30 +0800] "GET /js/jqueryui/1.10.3/jquery-ui.min.js HTTP/1.1" 200 435844
    114.241.108.197 - - [01/Dec/2017:11:28:33 +0800] "GET /js/jqueryui/1.10.3/jquery-ui.min.js HTTP/1.1" 200 435844
    114.241.108.197 - - [01/Dec/2017:14:40:49 +0800] "GET /js/jqueryui/1.10.3/jquery-ui.min.js HTTP/1.1" 200 435844
    223.72.82.98 - - [01/Dec/2017:15:26:01 +0800] "GET /js/jqueryui/1.10.3/jquery-ui.min.js HTTP/1.1" 200 435844
    59.108.217.106 - - [01/Dec/2017:09:34:56 +0800] "GET /js/jqueryui/1.10.3/jquery-ui.min.js HTTP/1.1" 200 435844
    59.108.217.106 - - [01/Dec/2017:09:35:06 +0800] "GET /js/jqueryui/1.10.3/jquery-ui.min.js HTTP/1.1" 200 435844
    59.108.217.106 - - [01/Dec/2017:13:08:43 +0800] "GET /js/jqueryui/1.10.3/jquery-ui.min.js HTTP/1.1" 200 435844

     由此可见,此网站最大的静态资源是这个jquery-ui.min.js文件。

    uniq去重

     cat localhost_access_log.2017-12-01.txt |cut -d ' ' -f 1,10 |sort -t ' ' -k 2n,1|uniq
    223.72.82.98 61692
    59.108.217.106 61692
    114.241.108.197 95786
    223.72.82.98 95786
    59.108.217.106 95786
    114.241.108.197 116060
    223.72.82.98 116060
    59.108.217.106 116060
    112.65.193.14 284394
    114.241.108.197 284394
    223.72.82.98 284394
    59.108.217.106 284394
    114.241.108.197 394554
    223.72.82.98 394554
    59.108.217.106 394554
    112.65.193.14 435844
    114.241.108.197 435844
    223.72.82.98 435844
    59.108.217.106 435844

    wc统计

    [root@iZ25klm6k7uZ logs]# wc -l localhost_access_log.2017-12-01.txt  统计行数
    1967 localhost_access_log.2017-12-01.txt
    [root@iZ25klm6k7uZ logs]# wc -w localhost_access_log.2017-12-01.txt  统计词数
    19670 localhost_access_log.2017-12-01.txt
    [root@iZ25klm6k7uZ logs]# wc -m localhost_access_log.2017-12-01.txt  共计字符数
    219011 localhost_access_log.2017-12-01.txt
    [root@iZ25klm6k7uZ logs]# 

    sed正则查找

    用sed来查找500的日志信息:

    [root@iZ25klm6k7uZ logs]# sed -n '/500/p' localhost_access_log.2017-12-01.txt
    119.127.17.97 - - [01/Dec/2017:14:23:18 +0800] "POST /interview/add.do HTTP/1.1" 500 19582
    119.127.17.97 - - [01/Dec/2017:14:23:24 +0800] "POST /interview/add.do HTTP/1.1" 500 19582
    119.127.17.97 - - [01/Dec/2017:14:24:12 +0800] "POST /interview/add.do HTTP/1.1" 500 19582
    119.127.17.97 - - [01/Dec/2017:14:31:11 +0800] "POST /interview/add.do HTTP/1.1" 500 19582
    119.127.17.97 - - [01/Dec/2017:14:49:51 +0800] "POST /interview/add.do HTTP/1.1" 500 19582
    119.127.17.97 - - [01/Dec/2017:14:49:57 +0800] "POST /interview/add.do HTTP/1.1" 500 19582
    119.127.17.97 - - [01/Dec/2017:14:55:45 +0800] "POST /interview/add.do HTTP/1.1" 500 19582
    119.127.17.97 - - [01/Dec/2017:14:58:03 +0800] "POST /interview/add.do HTTP/1.1" 500 19582
    59.108.217.106 - - [01/Dec/2017:15:00:22 +0800] "POST /interview/add.do HTTP/1.1" 500 19582

    注意:-n和-p配合,表示只打印匹配的行。

    awk正则匹配

    用awk来查找500日志信息:

    awk '($9 ~ /500/)' localhost_access_log.2017-12-01.txt 

    输出和上面的sed一样。

    zwk有默认的分隔符,比如 ,空格等。如果要指定分隔符可以用-F。

    zwk的强大之处在于它支持编程,格式如下:

    awk pattern { action } 例如上面的查找500日志可以完整表达如下:

    [root@iZ25klm6k7uZ logs]# awk -F ' ' '($9 ~ /500/){print }' localhost_access_log.2017-12-01.txt 
    119.127.17.97 - - [01/Dec/2017:14:23:18 +0800] "POST /interview/add.do HTTP/1.1" 500 19582
    119.127.17.97 - - [01/Dec/2017:14:23:24 +0800] "POST /interview/add.do HTTP/1.1" 500 19582
    119.127.17.97 - - [01/Dec/2017:14:24:12 +0800] "POST /interview/add.do HTTP/1.1" 500 19582
    119.127.17.97 - - [01/Dec/2017:14:31:11 +0800] "POST /interview/add.do HTTP/1.1" 500 19582
    119.127.17.97 - - [01/Dec/2017:14:49:51 +0800] "POST /interview/add.do HTTP/1.1" 500 19582
    119.127.17.97 - - [01/Dec/2017:14:49:57 +0800] "POST /interview/add.do HTTP/1.1" 500 19582
    119.127.17.97 - - [01/Dec/2017:14:55:45 +0800] "POST /interview/add.do HTTP/1.1" 500 19582
    119.127.17.97 - - [01/Dec/2017:14:58:03 +0800] "POST /interview/add.do HTTP/1.1" 500 19582
    59.108.217.106 - - [01/Dec/2017:15:00:22 +0800] "POST /interview/add.do HTTP/1.1" 500 19582

    同时查找500和404的日志:

    awk -F ' ' '($9 ~ /500/ || $9 ~ /404/){print $1,$6,$7,$9}' localhost_access_log.2017-12-01.txt

    或者

    awk -F ' ' '($9 ~ /500|404|400/){print $1,"-",$4,"-",$6,"-",$9}' localhost_access_log.2017-12-01.txt
  • 相关阅读:
    [SHOI2015]自动刷题机
    【教程】AI画放射图
    AI画圆角矩形
    极限运动:街头极限单车,不只是牛逼!
    DPK750针式打印机驱动,750u.dll下载
    文艺青年必看的Ⅹ部心理电影
    C语言程序设计-第2章 算法-程序的灵魂
    C语言程序设计-第1章 程序设计和C语言
    20151127笔记
    20151125小概念
  • 原文地址:https://www.cnblogs.com/at0x7c00/p/7945275.html
Copyright © 2011-2022 走看看