zoukankan      html  css  js  c++  java
  • swissport蛋白数据库拆分成不同的子库

    swissport蛋白数据库拆分成不同的子库

    首先从数据库下载文件

    wget ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/taxonomic_divisions/uniprot_sprot_*.dat.gz
    wget ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/complete/uniprot_sprot.fasta.gz
    

    使用写好的脚本进行操作

    perl split_swissprot.pl
    grep ">" uniprot_sprot.fasta |sed 's/>//'|perl -lane 'print qq{$F[0]	}.join(" ",@F[1..$#F])' >uniprot_sprot.id.annot.xls
    

    split_swissprot代码如下

    #!/usr/bin/perl -w
    use strict;
    
    my $files = `ls taxonomic_divisions/uniprot_sprot_*.dat.gz`;
    chomp $files;
    my %hash;
    open OUT,">swissprot_id.xls";
    my @tmp = split(/
    /, $files);
    for my $id(@tmp){
    	chomp $id;
    	if($id =~ /uniprot_sprot_(.*).dat.gz/){
    		my $class = $1;
    		open IN,"gzip -dc $id|" || die $!;
    		$/="//
    ";
    		while(<IN>){
    			chomp;
    			my @array = split(/
    /);
    			my @array2 = split(/s+/, $array[0]);
    			if($array2[0] eq "ID"){
    				print OUT "$array2[1]	$class
    ";
    			}
    			else{
    				print "ID error!";
    			}
    			$hash{$array2[1]} = $class;
    		}
    		close IN;
    	}
    }
    
    open ARCHAEA,">./Archaea.fa";
    open BACTERIA,">./Bacteria.fa";
    open FUNGI,">./Fungi.fa";
    open HUMAN,">./Human.fa";
    open INVERTEBRATES,">./Invertebrates.fa";
    open MAMMALS,">./Mammals.fa";
    open PLANTS,">./Plants.fa";
    open RODENTS,">./Rodents.fa";
    open VERTEBRATES,">./Vrtebrates.fa";
    open VIRUSES,">./Viruses.fa";
    open ANIMAL,">./Animal.fa";
    open OTHER,">./Other.fa";
    open UNKOWN,">./Unkown.fa";
    
    open FASTA,"./uniprot_sprot.fasta";
    $/=">";
    <FASTA>;
    while(<FASTA>){
    	chomp;
    	my @tmp2 = split(/
    /, $_);
    	my @tmp3 = split(/s+/, $tmp2[0]);
    	if($tmp3[0] =~ /sp|(.*)|(.*)\_(.*)/){
    		my $cao = $2."\_".$3;
    		unless(exists $hash{$cao}){
    			print UNKOWN ">$_";
    			print OUT "$cao	unkown
    ";
    			next;
    		}
    		if($hash{$cao} eq "archaea"){
    			print ARCHAEA ">$_";
    			print OTHER ">$_";
    		}
    		elsif($hash{$cao} eq "bacteria"){
    			print BACTERIA ">$_";
    			print OTHER ">$_";
    		}
    		elsif($hash{$cao} eq "fungi"){
    			print FUNGI ">$_";
    			print OTHER ">$_";
    		}
    		elsif($hash{$cao} eq "human"){
    			print HUMAN ">$_";
    			print ANIMAL ">$_";
    		}
    		elsif($hash{$cao} eq "invertebrates"){
    			print INVERTEBRATES ">$_";
    			print ANIMAL ">$_";
    		}
    		elsif($hash{$cao} eq "mammals"){
    			print MAMMALS ">$_";
    			print ANIMAL ">$_";
    		}
    		elsif($hash{$cao} eq "plants"){
    			print PLANTS ">$_";
    		}
    		elsif($hash{$cao} eq "rodents"){
    			print RODENTS ">$_";
    			print ANIMAL ">$_";
    		}
    		elsif($hash{$cao} eq "vertebrates"){
    			print VERTEBRATES ">$_";
    			print ANIMAL ">$_";
    		}
    		elsif($hash{$cao} eq "viruses"){
    			print VIRUSES ">$_";
    			print OTHER ">$_";
    		}
    		else{
    			print UNKOWN ">$_";
    		}
    	}
    }
    close FASTA;
    close OUT;
    my $fas = `ls *.fa *.fasta`;
    chomp $fas;
    open FORMAT,">formatdb.sh";
    my @fas_arr = split(/s+/, $fas);
    for my $fas_file(@fas_arr){
    	chomp $fas_file;
    	print FORMAT "/media/sdb/bio/blast/bin/formatdb -p T -i $fas_file
    ";
    }
    close FORMAT;
  • 相关阅读:
    How to build Linux system from kernel to UI layer
    Writing USB driver for Android
    Xposed Framework for Android 8.x Oreo is released (in beta)
    Linux Smartphone Operating Systems You Can Install Today
    Librem 5 Leads New Wave of Open Source Mobile Linux Contenders
    GUADEC: porting GNOME to Android
    Librem 5 – A Security and Privacy Focused Phone
    GNOME and KDE Join Librem 5 Linux Smartphone Party
    Purism计划推出安全开源的Linux Librem 5智能手机
    国产系统之殇:你知道的这些系统都是国外的
  • 原文地址:https://www.cnblogs.com/raisok/p/15194168.html
Copyright © 2011-2022 走看看