wget http://circbase.org/download/human_hg19_circRNAs_putative_spliced_sequence.fa.gz
gunzip human_hg19_circRNAs_putative_spliced_sequence.fa.gz
wget http://circbase.org/download/mouse_mm9_circRNAs_putative_spliced_sequence.fa.gz
gunzip mouse_mm9_circRNAs_putative_spliced_sequence.fa.gz
wget http://circbase.org/download/mmu_mm9_circRNA.txt
wget http://circbase.org/download/hsa_hg19_circRNA.txt
perl extract_circBase_seq.pl mouse_mm9_circRNAs_putative_spliced_sequence.fa mmu_mm9_circRNA.txt mmu_mm9_circRNA.fa
- 这个脚本是为了从可能的circRNA序列里面提取已知的circRNA序列
- 下载坐标转换工具
wget http://hgdownload.cse.ucsc.edu/admin/exe/linux.x86_64/liftOver
wget https://hgdownload-test.gi.ucsc.edu/goldenPath/hg19/liftOver/hg19ToHg38.over.chain.gz
gunzip hg19ToHg38.over.chain.gz
wget https://hgdownload-test.gi.ucsc.edu/goldenPath/mm9/liftOver/mm9ToMm10.over.chain.gz
gunzip mm9ToMm10.over.chain.gz
cut -f1,2,3 mmu_mm9_circRNA.txt >mmu_mm9_circRNA_pos.txt
les mmu_mm9_circRNA.txt |perl -F" " -lane 'print join(" ",@F[3..12])' >mmu_mm9_circRNA.info.txt
#http://genome.ucsc.edu/cgi-bin/hgLiftOver trans mmu_mm9_circRNA_pos.txt to mmu_mm10_circRNA_pos.txt
#使用网页版的转换,由于只提供了位置信息,不知道属于那一条链,会有一些bug
paste mmu_mm10_circRNA_pos.txt mmu_mm9_circRNA.info.txt >mmu_mm10_circRNA.txt
#use liftOver
les mmu_mm9_circRNA.txt|sed '1d'|awk '{print $1" "$2" "$3" "$5" "0" "$4" "$2" "$3" 255,0,0"}' >mmu_mm9_circRNA.pre2mm10.id.txt
./liftOver mmu_mm9_circRNA.pre2mm10.id.txt mm9ToMm10.over.chain mmu_mm10_circRNA.bed unmap
perl -e '$cir=shift;$bed=shift;open IN,$cir;while(<IN>){chomp;if(/^#/){print"$_
";}else{@a=(split/ /,$_);$info=join(" ",@a[5..12]);$hash{$a[4]}=$info;}};close IN;open IN2,$bed;while(<IN2>){chomp;@b=(split/ /,$_);if(exists $hash{$b[3]}){print qq{$b[0] $b[1] $b[2] $b[5] $b[3] $hash{$b[3]}
}}};close IN2;' mmu_mm9_circRNA.txt mmu_mm10_circRNA.bed >mmu_mm10_circRNA2.txt