zoukankan      html  css  js  c++  java
  • python cookbook3

    1、算GC含量

    def validate_base_sequence(base_sequence, RNAflag = False):      #判断序列是否只含有A、T、G、C、U
        seq = base_sequence.upper()
        return len(seq) == (seq.count('U' if RNAflag else 'T') +seq.count('C') +
                   seq.count('A') +seq.count('G'))              
        
    def gc_content(base_seq):                                #计算GC含量
        assert validate_base_sequence(base_seq), 'argument has invalid characters'
        seq = base_seq.upper()
        return (base_seq.count('G') +base_seq.count('C')) / len(base_seq)
    
    def recognition_site(base_seq, recognition_seq):
        return base_seq.find(recognition_seq)
    
    def test():
        assert validate_base_sequence('ACTG')
        assert validate_base_sequence('')
        assert not validate_base_sequence('ACUG')
        assert validate_base_sequence('ACUG', True)
        assert not validate_base_sequence('ACUG', False)
        assert validate_base_sequence('ACTG', False)
        assert .5 == gc_content('ACTG')
        assert 1.0 == gc_content('CCGG')
        assert .25 == gc_content('ACTT')
        print('All tests passed.')
    test()
    

    2、元组

    >>> DNABases, RNABases = 'TCAG', 'UCAG'
    >>> DNABases
    'TCAG'
    >>> RNABases
    'UCAG'
    >>> bases = 'TCAG', 'UCAG' # a two-element tuple
    >>> bases
    ('TCAG', 'UCAG')
    

    3、切割

    def recognition_site(base_seq, recognition_seq):
        return base_seq.find(recognition_seq)
    
    def restriction_cut(base_seq, recognition_seq, offset = 0):
        site = recognition_site(base_seq, recognition_seq)
        return base_seq[:site+offset], base_seq[site+offset:]
    
    aseq1 = 'AAAAATCCCGAGGCGGCTATATAGGGCTCCGGAGGCGTAATATAAAA'
    left, right = restriction_cut(aseq1, 'TCCGGA')
    

    4、读取fasta序列,并取最长的序列

    def read_FASTA(filename):
        with open(filename) as file:
            contents = file.read() 
        entries = contents.split('>')[1:] # skip blank first entry
        partitioned_entries = [entry.partition('
    ') for entry in entries]
        result = [(entry[0], entry[2].replace('
    ', '')) for entry in partitioned_entries]
        return result
    
    def longest_sequence(filename):
        longest_seq = ''
        for info, seq in read_FASTA(filename):
            longest_seq = max(longest_seq, seq, key=len)
        return longest_seq
    
  • 相关阅读:
    微信小程序实现滚动到指定位置
    微信小程序,scroll-view组件的使用,跳转到指定的锚点/定位跳转
    小程序,报渲染层错误。图片无法渲染。
    input Input 输入判断/正则
    js,某元素在浏览器页面浮动/飘动
    前端模块化
    npx create-react-app命令不成功,更改成淘宝镜像
    深入理解jdk和jre(转)
    Java学习路线图·影响一代又一代程序员的经典书籍!(转)
    ZAB协议(转)
  • 原文地址:https://www.cnblogs.com/djx571/p/11105340.html
Copyright © 2011-2022 走看看