1.交互状态
#!/bin/bash
#count genes in region,need three para
#author lee
if [ $# -ne 3 ]
then
echo "Usage chr start end"
else
num=$(awk -v chr=$1 -v start=$2 -v end=$3 '{if($3=="gene" && $1==chr && $4>=start && $4<=end)sum++}END{print sum}' /public/home/caisl/lee/genome/msu.gff3)
echo -e " 33[31mThere are $num genes of $1:$2-$3 33[0m"
echo -e " 33[32mDetail info is in the file of ${1}_${2}_${3} 33[0m"
awk -v chr=$1 -v start=$2 -v end=$3 '{if($3=="gene" && $1==chr && $4>=start && $4<=end)print $0}' /public/home/caisl/lee/genome/msu.gff3 >${1}_${2}_${3}.txt
fi
2.批量提取
chr | start | end |
---|---|---|
1 | 100 | 100 |
2 | 200 | 2000 |
#!/bin/bash
#author lee
if [ -f detail_of_$1 ]
then
rm detail_of_$1
fi
if [ -f number_of_$1 ]
then
rm number_of_$1
fi
while read line
do
chr=$(echo $line |awk '{print $1}');
start=$(echo $line |awk '{print $2}');
end=$(echo $line |awk '{print $3}');
awk -v chr=$chr -v start=$start -v end=$end '{if($3=="gene" && $1==chr && $4>=start && $4<=end)print $0}' /public/home/caisl/lee/genome/msu.gff3 >>detail_of_$1;
num=$(awk -v chr=$chr -v start=$start -v end=$end '{if($3=="gene" && $1==chr && $4>=start && $4<=end)sum++}END{print sum}' /public/home/caisl/lee/genome/msu.gff3);
echo "There are $num genes of $chr:$start-$end"
echo "There are $num genes of $chr:$start-$end" >>number_of_$1;
done <$1
后面继续尝试下面这种方法
#!/bin/bash
cat $1 |while read gene chr from to
do
#echo $chr $from $to
if echo $2 |grep -q '.*.vcf.gz$';then
vcftools --gzvcf $2 --chr $chr --from-bp $from --to-bp $to --recode --recode-INFO-all --out $gene.$chr.$from-$to
elif echo $2 |grep -q '.*.vcf$';then
vcftools --vcf $2 --chr $chr --from-bp $from --to-bp $to --recode --recode-INFO-all --out $gene.$chr.$from-$to
fi
done