[root@python ~]# cat testcount.txt
a 1.1.1.1
b 2.2.2.2
c 3.3.3.3
a 1.1.1.1
d 4.4.4.4
e 5.5.5.5
f 6.6.6.6
1.去重:
awk '!a[$1]++ {print}' testcount.txt
a 1.1.1.1
b 2.2.2.2
c 3.3.3.3
d 4.4.4.4
e 5.5.5.5
f 6.6.6.6
2.统计第二列
awk '{a[$1]++} END{for (i in a) print a[i],i}' testcount.txt
2 a
1 b
1 c
1 d
1 e
1 f
3.统计日志中访问大于100次的IP
[root@python ~]# awk '{a[$1]++}END{for (i in a) {if (a[i] >1) print a[i],i}}' testcount.txt
2 a
过滤邮箱地址
[root@python ~]# egrep '[a-z0-9]{3,}@[1-9a-z]{4,}.[1-9a-z]{2,}' oldboy.txt
linuxdxw@onecloud.cn
[root@python ~]# egrep '[a-z0-9]{3,}@[1-9a-z]{4,}.[1-9a-z]{2,}' oldboy.txt
过滤ip地址
[root@python ~]# egrep '[0-9]{1,3}(.[0-9]){3}' oldboy.txt
inet addr:10.0.0.8 Bcast:10.0.0.255 Mask:255.255.255.0
inet addr:10.0.0.8 Bcast:10.0.0.255 Mask:255.255.255.0
过滤mac地址
[root@python ~]# egrep '[0-9a-z]{2}(:[0-9a-z]{2}){5}' oldboy.txt
link/ether 02:00:47:76:00:6c brd ff:ff:ff:ff:ff:ff