awk - 走看看

zoukankan html css js c++ java

awk

Section 1: 常用运维awk命令

统计tomcat每秒的带宽(字节)，最大的排在最后面

#cat localhost_access_log.txt | awk '{ bytes[$5] += $NF; }; END{for(time in bytes) print   bytes[time] " " time}' | sort -n

统计某一秒的带宽

#grep "18:07:34" localhost_access_log.txt |awk '{ bytes += $NF; } END{ print bytes }'

统计指定ip.txt中ip在local_access.txt中出现的次数

#cat ip.txt //内容如下

12.3.4.5

12.3.4.6

12.3.4.7

12.3.4.8

#cat local_access.txt

19:23:35 /a.html   12.3.4.5

19:23:35 /b.html   12.3.4.5

19:23:35 /c.html   12.3.4.6

19:23:35 /d.html   12.3.4.7

19:23:35 /a.html   12.3.4.9

19:23:35 /b.html   12.3.4.9

19:23:35 /c.html   12.3.4.9

#awk -F " " '{if (NR==FNR) {arr1[$1]=1} else{arr2[$3]++;}} END{for(ip in arr1){print ip,arr2[ip]}}' ip.txt local_access.txt

12.3.4.5 2

12.3.4.6 1

12.3.4.7 1

12.3.4.8

Section 2: $0,$1,$2,$NF,$(NF-1)的使用

$0整个当前行， $1当前行的第一个域   $NF为最后一个域   $(NF-1)为倒数第二个

#echo "a b c d e" |awk '{print $1; print $2; print $(NF-1);print $NF;print $0}'

a                 //对应第1个域

b                 //对应第2个域

d                 //对应$(NF-1),对应倒数第二个域

e                 //对应$NF,最后一个域

a b c d e         //对应$0

Section 3: print, printf用法

#awk 'BEGIN{a=1;b="213";print "output "a","b;}'

output 1,213

#awk 'BEGIN{a=1;b="213";print "output",a,","b;}'

output 1 ,213

printf的使用

＃awk 'BEGIN{a=1;b="213";printf("output %d,%s ",a,b)}'

output 1,213

Section 4: 选择分隔符

awk默认是按照空格来分割, NF表示域的个数

#echo "a:b c,d" |awk '{print $1; print $2; print NF}'

a:b

c,d

2

根据":",空格，","来进行分割

#echo "a:b c,d" |awk -F " |,|:" '{print $1; print $2; print NF}'

a

b

4

Section 5: BEGIN,END用法

abc.txt内容如下：

first lady

second boy

third child

#cat abc.txt |awk 'BEGIN {print "begin process"} {print "process 1 "$1} {print "process 2 "$2} END { print " the end"}'

换行后如下:

#cat abc.txt |awk -F " " '

         'BEGIN {print "begin process"}     //在开头的时候执行一次

               {print "process 1 "$1}       //每一行执行一次

               {print "process 2 "$2}       //每一行执行一次

          END { print " the end"}'         //最后的时候执行一次

输出如下

begin process

process 1 first         // {print "process 1 "$1} 执行了一次

process 2 lady          // {print "process 2 "$2} 执行了一次

process 1 second

process 2 boy

process 1 third

process 2 child

the end

没有BEGIN，只有END的情况

#cat abc.txt |awk '{print "begin process"} {print "process 1 "$1} {print "process 2 "$2} END { print " the end"}'

格式化语句如下

#cat abc.txt |awk -F ":"

'{print "begin process"}          //因为没有BEGIN 所以这个每一行都会执行

   {print "process 1 "$1}           //每一行都会执行

   {print "process 2 "$2}           //每一行都会执行

    END { print " the end"}'        //最后执行一次



输出如下:

begin process

process 1 first

process 2 lady

begin process

process 1 second

process 2 boy

begin process

process 1 third

process 2 child

the end

Section 6: 数组使用

awk中数据结构使用, 数组也可以理解为map

#awk 'BEGIN{array1["a"]=1;array1[2]="213";print array1["a"],array1[2]}'

1 213

year.txt中内容如下

2016:09 1    //表示2016年9月，有一个访问

2016:06 1

2016:06 1

2016:01 1

2015:01 1

2014:01 1

2015:01 1

2016:02 1

下面语句是把每个月的访问量相加,排序后输出

#awk '{bytes[$1]+=$2} END { for(time in bytes) print bytes[time],time}' year.txt |sort -n

展开如下

#awk

    '{bytes[$1]+=$2}             //bytes为数组，下标是时间，value是访问量

      END {

          for(time in bytes) print bytes[time], time

      }'

      year.txt |sort -n

输出的内容如下；   bytes是一个数组，下标是字符串""上面用数组，下标可以是数字，也可以是字符串

1 2014:01

1 2016:01

1 2016:02

1 2016:09

2 2015:01

2 2016:06

＃awk 'BEGIN{tB["a"]="a1";tB["b"]="b1";if(tB["c"]!="1"){print "no found";};for(k in tB){print k,tB[k];}}'

展开是如下

＃awk 'BEGIN{

          tB["a"]="a1";

          tB["b"]="b1";

          if(tB["c"]!="1"){           //这个地方会判断在里面，但是会往tB占用一个值

              print "no found";

          };

          for(k in tB){

              print k,tB[k];

          }

       }'

输出如下：

no found

a a1

b b1

c                   很奇怪， “c”没有赋值，循环的时候，就发现在里面了，这个里面有副作用

要修改这个点，需要使用如下

#awk 'BEGIN {

         tB["a"]="a1";

         tB["b"]="b1";

         if ("c" in tB) {                      //用这个来进行判断，就没有负作用

             print "c is in tB";

         }

         for(k in tB){

             print k,tB[k];

         }

      }'

awk的多维数组

#awk 'BEGIN{ for(i=1;i<=3;i++) {for(j=1;j<=3;j++) {tarr[i,j]=i*j;print i,"*",j,"=",tarr[i,j]}}}'

展开后如下:

＃awk 'BEGIN{

          for(i=1;i<=3;i++) {

              for(j=1;j<=3;j++) {

                  tarr[i,j]=i*j;

                  print i,"*",j,"=",tarr[i,j]

              }

          }

      }'

输出如下：

1 * 1 = 1

1 * 2 = 2

1 * 3 = 3

2 * 1 = 2

2 * 2 = 4

2 * 3 = 6

3 * 1 = 3

3 * 2 = 6

3 * 3 = 9

awk多维数组的in判断

awk 'BEGIN{ tarr[1,3]=5;if ((1,3) in tarr) print "1,3 in"; if ((4,4) in tarr) print "4,4 in"}'

＃awk 'BEGIN{

          tarr[1,3]=5;

          if ((1,3) in tarr)          //直接使用(1,3)来判断in语句

              print "1,3 in";

          if ((4,4) in tarr)

              print "4,4 in"}'

Section 7: for语句使用

＃awk 'BEGIN{array1["a"]=1;array1["c"]=3;array1["b"]=2;for(index1 in array1) print index1,array1[index1]}'

展开如下:

#awk 'BEGIN{

          array1["a"]=1;

          array1["c"]=3;

          array1["b"]=2;

          for(index1 in array1)

              print index1,array1[index1]

      }'

输出如下：

a 1

b 2

c 3

for也可以使用k=1;k<=3;k++的形式

#awk 'BEGIN{array1[1]="a";array1[3]="c";array1[2]="b";len=length(array1);for(k=1;k<=len;k++) print k,array1[k]}'

展开如下:

#awk 'BEGIN{

          array1[1]="a";

          array1[3]="c";

          array1[2]="b";

          len=length(array1);        //得到数组的长度

          for(k=1;k<=len;k++)

              print k,array1[k]

      }'

输出如下

1 a

2 b

3 c

Section 8: 内置函数使用

int函数，把字符串转为整数

#awk 'BEGIN {print int("12.9")}'        返回一个整数

12

index函数

#awk 'BEGIN {print index("12.9343",".")}'     //index方法返回"."在"12.9343的位置，没有找到则返回0

3

length函数    得到数组的长度,字符串长度

#awk 'BEGIN{array1["a"]=1;array1["b"]=2;print length(array1)}'

输出如下:

2

#awk 'BEGIN{a="123";print length(a)}'   得到字符串长度

3

match函数，   检测info中是否含有"te" 如果有，则返回"te"第一次出现的位置，如果没有则返回0

#awk 'BEGIN {info="is is test"; print match(info,"te");}'

rand函数   生成随机数   但是事实上是不随机的

#awk 'BEGIN {print rand " " rand}'    rand会生成一个0-1的数字

0.840188 0.394383         //每次运行第一个，第二个都是这个数字

split函数按照某个分隔符，对字符串进行分割

split按照" "对"it is a test"进行切割，切割后的内容放在thearray中返回的是split后，thearray的元素个数，

#awk 'BEGIN {print split("it is a test",thearray," "); print thearray[1]}'

4                    //split后返回数组的长度

it                   //打印第一个元素

sub函数   替换

#awk 'BEGIN {info="this a test"; sub("a","b",info); print info }'   把info中"a"用"b"替代

this b test

substr函数   得到子字符串

substr(s, m, n)     s是要截取的字符串,m是开始点，从1开始，   n是要截取的长度

#awk 'BEGIN {print substr("12.9343",2,4)}'      //substr

2.93

toupper函数   字符串转为大写

#awk 'BEGIN {info="this a test"; print toupper(info);}'

THIS A TEST

tolower函数   字符串转为消协

#awk 'BEGIN {info="thIS A TEST"; print tolower(info);}'

this a test

查看全文

相关阅读:
设计模式学习笔记之一：策略模式
 向上转型和向下转型
 html readonly和disabled的区别
 如何自定义JSR-303标准的validator
vue 组件属性props,特性驼峰命名,连接线使用
 laydate中设置动态改变max与min值的方法
 浅谈JS中 reduce() 的用法
 jq 实时监听input输入框的变化
 npm install --save 和 npm install -d的区别
 vue中html、js、vue文件之间的简单引用与关系

原文地址：https://www.cnblogs.com/steel-chen/p/10636786.html