./stat_ip.sh live-https.log-0510.gz 1000
#首先用shell脚本可以统计出?日志慢请求查询时间超过?秒对应的ip和对应的调用次数(传两个参数)
#!/bin/bash log=$1 threshold=$2 function define() { ori_log_path="/home/bjliuzezhou/${log}" tmp_log_path="/home/bjliuzezhou/temp.log" tmp_log_path2="/home/bjliuzezhou/temp2.log" confirm_path="/home/bjliuzezhou/previewlist.log" } function gather() { echo 'gather start-----------------------------------------------------------------' zcat ${ori_log_path} | grep 'NewsApp'| grep 'previewlist' | awk '{print $(NF-3),$1}'> ${tmp_log_path} log_num=`cat ${tmp_log_path} | wc -l` request_time=`awk '{print $1}' ${tmp_log_path} | awk '{sum+=$1}END{print sum}'` ave_request_time=`echo | awk "{print ${request_time}/${log_num}}" ` cat ${tmp_log_path} | awk -v th=${threshold} '$1>th {print $2}'> ${tmp_log_path2} log_num2=`cat ${tmp_log_path2} | wc -l` ratio=`echo | awk "{print ${log_num2}/${log_num}}" ` cat ${tmp_log_path2}| sort -n | uniq -c | sort -n > ${confirm_path} # awk -f ip_cn.awk ${confirm_path} rm -f /home/bjliuzezhou/temp.log rm -f /home/bjliuzezhou/temp2.log echo 'gather end--------------------------------------------------------------------' } function output() { echo "request total time is ${request_time}" echo "request total number is ${log_num}" echo "aver request time is ${ave_request_time}" echo "long request total number is ${log_num2}" echo "long request time ratio is ${ratio}" } function main() { define gather output } main
nohup ./ip_operator.sh preview.log &
#然后将ip后8位置为0(原因参照全国ip段划分),统计出ip段以及对应的次数,并且解析出对应的运营商
#参数要传刚获取的preview.log,抓取运营商时间较长,慎重!并且需要注意一个坑,awk脚本中不能执行shell脚本!
#!/bin/bash cut -d . -f 1,2,3 $1 > temp.log sed 's/$/&.0/g' temp.log | sort -n | uniq -c > temp2.log awk '{system("sh getip.sh " $3)}' temp2.log > zzz.log rm -rf temp.log
#!/bin/bash echo | curl ip.cn?ip=$1
将最后解析出的temp2.log和zzz.log数据放在excel进行整合,分列,然后去重,开始快乐的统计之旅吧