# grep the name of gene and find the real name in goat_reference_gene_list
for record in `grep "Re" ../out_No_Pinneped`; do
grep "${${record##*/}%.*}" ../goat_gene.list >> gene_record
done
# pick up the gene symbol
awk '{print $2}' gene_record > ./gene_symbol
# find the overlap
for i in `les gene_symbol `; do
grep "$i" results_Yuan >> overlap
done
# find specific range of seq
# Method 1: seqkit
seqkit faidx ./chr10.fa chr10:47818633-47822767| les
# Method 2: samtools
samtools faidx ./trichechus_manatus.fa
samtools faidx trichechus_manatus.fa NW_004446089.1:0-30000 > NW_004446089.1.fa
#===================================================================================
#get the order of "coevolution site"/"total length" in No-pinneped coevolution list
1. get the total length list
seqkit head -n 1 ../Re_alignment/NP_001272477.fa | sed --regexp-extended 's/>S+//' | wc -c
for gene in `grep "Re" ./out_No_Pinneped `; do
echo $gene >> out_No_Pinneped_statistics; seqkit head -n 1 $gene| sed --regexp-extended 's/>S_//' | w
done
2. calculate and sort in excel