zoukankan      html  css  js  c++  java
  • 根据SNP的位置从基因组提取上下游序列

    代码如下:

    #!/usr/bin/perl -w
    
    use strict;
    
    die "perl $0 <vcf> <genome>" if(@ARGV == 0);
    
    #Author:yueyao@genomics.cn
    
    my $vcf=shift;
    my $genome=shift;
    
    my%hash;
    my $id;
    open GENOME,$genome or die $!;
    while(<GENOME>){
        chomp;
        if(/^>/){
            $id=$_;
            $id=~s/>//;
            $id=~s/ //g;
        }else{
            $hash{$id} .= $_;
        }
    }
    close GENOME;
    
    my@temp;
    my$pos;
    my$start;
    my$end;
    my$len;
    my$seq;
    my$fetchseq;
    my($refindelseq,$altindelseq,$upseq,$downseq,$downstart,$refindellen,$upend,$upstart);
    open VCF,$vcf or die $!;
    while(<VCF>){
        chomp;
        next if(/^Chr/);
        @temp=split/	/;
        if(exists $hash{$temp[0]}){
           $seq=$hash{$temp[0]};
           $pos=$temp[1];
           $refindelseq=$temp[3];
           $altindelseq=$temp[4];
           $refindellen=length($refindelseq);
           $upstart=$pos-1-100;
           $upend=$pos-1;
           $upseq=substr($seq,$upstart,100);
           $downstart=$pos+$refindellen-1;
           $downseq=substr($seq,$downstart,100);
           $end=$pos+100+$refindellen-1;
           print "$_	>$temp[0]_$upstart\_$end	$upseq[$refindelseq/$altindelseq]$downseq
    "
        }
    }
    close VCF;
    

      

  • 相关阅读:
    函数重载和函数指针在一起
    Uva
    Uva
    Uva
    Uva
    Uva
    CCPC-Wannafly-day5
    CCPC-Wannafly-day3
    CCPC-Wannafly-day2
    CCPC-Wannafly-Winter 2020.01.12总结
  • 原文地址:https://www.cnblogs.com/raisok/p/11457083.html
Copyright © 2011-2022 走看看