args.txt
#! /bin/bash if [ -z $1 ] || [ ! -e $1 ] then echo "Usage: cmd.sh input " exit fi echo $0 for num in $*;do echo "$num" done for i in $(seq -3 $#); do echo $i done for i in {0..5} do echo $i done echo $@ for((i=4;i<7;i++));do echo $i done echo "all:$$" trimReg="s/\(^ *\)\(.*[^ ]\)\( *$\)/\2/" tmpfile=`cat /proc/sys/kernel/random/uuid` while read line; do value=${line#*=} key=${line%%=*} key=`echo ${key}|sed -e "${trimReg}"` value=`echo ${value}|sed -e "${trimReg}"` if [ "$key" == "url" ] then url=$value elif [ "$key" == "beginwith" ] then beginwith=$value elif [ "$key" == "endwith" ] then endwith=$value elif [ "$key" == "pagereg" ] then pagereg=$value elif [ "$key" == "savepath" ] then savepath=$value elif [ "$key" == "prefix" ] then prefix=$value elif [ "$key" == "proxy" ] then proxy=$value fi done < $1 echo "url:$url" echo "beginwith:$beginwith" echo "pagereg:$pagereg" echo "endwith:$endwith" echo "prefix:$prefix" echo "proxy:$proxy" echo "savepath:$savepath" echo "tmpfile:$tmpfile" if [ -z $proxy ] then content=`curl -s $url | iconv -f gbk -t utf-8` else content=`curl -x $proxy -s $url | iconv -f gbk -t utf-8` fi length=`expr length "${content}"` echo "download:$length byte(s)" content=${content#*${beginwith}} content=${content%%${endwith}*} length=`expr length "${content}"` echo "after filer:$length byte(s)" echo $content|grep -Po "$pagereg"|uniq > $savepath awk '{a[$0]++}END{for(m in a) print m}' $savepath > $tmpfile if [ ! -z $prefix ] then sed "s/^/$prefix/g" $tmpfile > $savepath else cp $tmpfile $savepath fi rm -f $tmpfile str="0000012345456789000000" echo $str #str= expr substr $str 1 2 #str=${str:2:3} str=${str#*0} echo $str #trim the string str=" s = " str=`echo $str | sed -e "${trimReg}"` echo [$str] echo $str | sed -e "${trimReg}"
url = focus.news.163.com beginwith = <ul class="focuslist-1" id="focusTab-1"> pagereg = (?<=href=\\")http://focus\\.news\\.163\\.com/[\\d]+.+?(?=\\") endwith = <div class="con-4" area clearfix"> savepath = 163.txt