zoukankan      html  css  js  c++  java
  • 物种丰度计算

    由于Qiime出了点问题,ITS项目先缓几天,这两天先忙着做meta的内容。

    物种丰度计算准备工作:

    1  使用SOAPAligner对过滤好的数据进行比对,得到相应的.soap文件,里面记录匹配到的reads的情况;

    2  还需要将所有用到的reference做一个TAX,tax文件记录reference的物种信息,每一行分别是一个物种的gi号、界、门、纲、目、科、属、种、亚种的名称,分别用制表符隔开;

    3  对于每个亚种genome,需要计算其长度,做成SIZE文件,每一行分别是一个亚种的名称和genome长度,用制表符隔开;

    计算过程:

    以亚种为基础,一个亚种的丰度Ab(G) = Ab(U) + Ab(M),其中

    G:物种

    U:unique数目,一条reads只比对上单一物种称为unique reads,一个物种的所有unique总和为U

    M:multiple数目,一条reads比对上多个物种成为multiple reads,一个物种的所有multiple总和为M

    Ab(U) = U/L(g); L(g)为相应物种的genome长度;

    Ni 表示 multiple reads 比对上的所有物种;

      1 #!/usr/bin/perl
      2 use strict;
      3 
      4 my $usage = "usage:get_profiling.pl <.soap file> <outprefix> <TAX file> <SIZE file> depth list
    ";
      5    $usage .= "depth 1..8 stand for Kindom..SubSpecies
    ";
      6 
      7 die $usage unless @ARGV>=5;
      8 
      9 my $soapfile = shift @ARGV;
     10 my $outprefix = shift @ARGV;
     11 my $taxfile = shift @ARGV;
     12 my $sizefile = shift @ARGV;
     13 my @depth = @ARGV;
     14 
     15 
     16 #########################################################################################################################
     17 #                                                            #
     18 #    %tax{gi}[Kindom][Phylum][Class][Order][Family][Genus][Species][SubSpecies]                    #
     19 #        1    2    3    4    5    6    7    8                        #
     20 #    %size{strname} = length of genome                                        #
     21 #    %soap{reads id}[0] = unique or multiple;                                    #
     22 #    %soap{reads id}[1..n] stand for matched strname;                                #
     23 #                                                            #
     24 #                    set tables                                    #    
     25 #                                                            #
     26 #########################################################################################################################
     27 open TAX,$taxfile or die $!;
     28 my %tax;
     29 while(<TAX>){
     30     chomp;
     31     my @a = split /s+/;
     32     for(my $i=1;$i<=8;$i++){
     33         $tax{$a[0]}[$i]=$a[$i];
     34     }
     35 }
     36 close TAX;
     37 open SIZE,$sizefile or die $!;
     38 my %size;
     39 while(<SIZE>){
     40     chomp;
     41     my @a = split /s+/;
     42     if (exists $tax{$a[0]})
     43     {
     44         $size{$tax{$a[0]}[8]} = int($a[1]);
     45     }
     46 }
     47 close SIZE;
     48 open SOAP,$soapfile or die $!;
     49 my %soap;
     50 while(<SOAP>){
     51     chomp;
     52     my @a = split /s+/;
     53     my $id = $a[0];
     54     $id = substr($id,0,$id.length()-2);
     55     my $strname = $tax{$a[7]}[8];
     56 
     57     if ( exists $soap{$id} ){
     58         $soap{$id}[0] = "multiple" unless ($soap{$id}[1] eq $strname);
     59     }else{
     60         $soap{$id}[0] = "unique";
     61     }
     62     push(@{$soap{$id}},$strname);
     63      
     64     my $reads2 = <SOAP>;    
     65 }
     66 close SOAP;
     67 #########################################################################################################################
     68 #                                                            #
     69 #                table setted successful                                    #
     70 #                                                            #
     71 #########################################################################################################################
     72 my %uniquenum;
     73 my %multiplestr;
     74 my %str_reads;
     75 my %reads_str;
     76 my %abu;
     77 my %abm;
     78 my %sum;
     79 my %ab_str;  #the result hash;
     80 foreach my $id (sort keys %soap){
     81     my $type = shift @{$soap{$id}};
     82     if ( $type eq "unique" ){
     83         $uniquenum{$soap{$id}[0]}++;
     84     }elsif ($type eq "multiple"){
     85         foreach my $strname($soap{$id}){    
     86             $multiplestr{$strname}++;
     87             $reads_str{$id}{$strname}++;
     88             $str_reads{$strname}{$id}++;
     89         }
     90     }
     91 }
     92 foreach my $strname(sort keys %uniquenum){
     93     $abu{$strname} = $uniquenum{$strname} / $size{$strname} if(exists $size{$strname});
     94 }
     95 foreach my $id(sort keys %soap){
     96     foreach my $strname (sort keys %{$reads_str{$id}}){
     97         $sum{$id} += $abu{$strname} if (exists $abu{$strname});
     98     }
     99 }
    100 foreach my $strname(sort keys %multiplestr){
    101     foreach my $id(sort keys %{$str_reads{$strname}}){
    102         $abm{$strname} += $abu{$strname}/$sum{$id} if($sum{$id});
    103     }
    104 }
    105 foreach my $strname(sort keys %abu){
    106     if(exists $abm{$strname})
    107     {
    108         $ab_str{$strname} = $abu{$strname} + $abm{$strname}; 
    109     }else{
    110         $ab_str{$strname} = $abu{$strname};
    111     }
    112 }
    113 undef %tax;
    114 undef %size;
    115 undef %soap;
    116 undef %uniquenum;
    117 undef %multiplestr;
    118 undef %str_reads;
    119 undef %reads_str;
    120 undef %abu;
    121 undef %abm;
    122 undef %sum;
    123 #########################################################################################################################
    124 #                                                            #
    125 #                abundance of subSpecies calculated successful                        #
    126 #                                                            #
    127 #########################################################################################################################                                
    128 
    129 open OUT,">test" or die $!;
    130 foreach my $keys(sort keys %ab_str){
    131     print OUT "$keys	$ab_str{$keys}
    ";
    132 }
    133 close OUT;
    134 foreach my $d (@depth){
    135     die "Please input correct depth !
    " unless ($d>=1 && $d<=8);
    136     &Abundance($d);
    137 }
    138 sub Abundance{
    139     my $depth = shift @_;
    140     my @text=("num","Kindom","Phylum","Class","Order","Family","Genus","Species","SubSpecies");
    141     open TAX,$taxfile or die $!;
    142     my %strtable;
    143     my %ab;
    144     while(<TAX>){
    145         chomp;
    146         my @a = split /s+/;
    147         $strtable{$a[$depth]}{$a[8]}++;
    148     }
    149     foreach my $name (sort keys %strtable){
    150         foreach my $strname (sort keys %{$strtable{$name}}){
    151             if(exists $ab_str{$strname}){
    152                 $ab{$name} += $ab_str{$strname};
    153             }
    154         }
    155     }
    156     open AB,">$outprefix"."_"."$text[$depth].abundance" or die $!;
    157     print AB "$text[$depth]	abundance
    ";
    158     foreach my $name(sort keys %ab){
    159         print AB "$name	$ab{$name}
    "
    160     }
    161     close AB;
    162 }
  • 相关阅读:
    【C/C++】动态内存分配和链表
    【C/C++】递归算法
    UnicodeMath编码教程
    UnicodeMath数学公式编码_翻译(Unicode Nearly Plain
    浅谈Java反射机制
    lvs--小白博客
    python部署lvs
    python部署galery集群
    python第九章:面向对象--小白博客
    python之yagmail模块--小白博客
  • 原文地址:https://www.cnblogs.com/lyon2014/p/4019132.html
Copyright © 2011-2022 走看看