zoukankan      html  css  js  c++  java
  • [LeetCode] 187. Repeated DNA Sequences 求重复的DNA序列

    All DNA is composed of a series of nucleotides abbreviated as A, C, G, and T, for example: "ACGAATTCCG". When studying DNA, it is sometimes useful to identify repeated sequences within the DNA.

    Write a function to find all the 10-letter-long sequences (substrings) that occur more than once in a DNA molecule.

    Example:

    Input: s = "AAAAACCCCCAAAAACCCCCCAAAAAGGGTTT"
    
    Output: ["AAAAACCCCC", "CCCCCAAAAA"]

    所有的DNA都是由一系列核苷酸组成的,简写为 A, C, G, and T,比如:"ACGAATTCCG"。当研究DNA时,识别DNA里的子序列是很有帮助的。写一个函数找出10个字母长度的出现过多次的子序列。

    解法1:hash table + hash set

    解法2: hash set

    解法3:hash table + bit manipulte

    Java:

    public List<String> findRepeatedDnaSequences(String s) {
            Set<String> result = new HashSet();
            if(s ==null || s.length() <2)
                return new ArrayList();
            Set<String> temp = new HashSet();
            for(int i=0; i<s.length()-9; i++){
                String x = s.substring(i,i+10);
                if(temp.contains(x)){
                    result.add(x);
                } else
                    temp.add(x);
                  
            }
            return new ArrayList(result);       
        } 

    Java: 

    public List<String> findRepeatedDnaSequences(String s) {
        Set seen = new HashSet(), repeated = new HashSet();
        for (int i = 0; i + 9 < s.length(); i++) {
            String ten = s.substring(i, i + 10);
            if (!seen.add(ten))
                repeated.add(ten);
        }
        return new ArrayList(repeated);
    }
    

    Java: hashmap + bits manipulation

    public List<String> findRepeatedDnaSequences(String s) {
        Set<Integer> words = new HashSet<>();
        Set<Integer> doubleWords = new HashSet<>();
        List<String> rv = new ArrayList<>();
        char[] map = new char[26];
        //map['A' - 'A'] = 0;
        map['C' - 'A'] = 1;
        map['G' - 'A'] = 2;
        map['T' - 'A'] = 3;
    
        for(int i = 0; i < s.length() - 9; i++) {
            int v = 0;
            for(int j = i; j < i + 10; j++) {
                v <<= 2;
                v |= map[s.charAt(j) - 'A'];
            }
            if(!words.add(v) && doubleWords.add(v)) {
                rv.add(s.substring(i, i + 10));
            }
        }
        return rv;
    }  

    Python:

    class Solution(object):
        def findRepeatedDnaSequences(self, s):
            """
            :type s: str
            :rtype: List[str]
            """
            dict, rolling_hash, res = {}, 0, []
    
            for i in xrange(len(s)):
                rolling_hash = ((rolling_hash << 3) & 0x3fffffff) | (ord(s[i]) & 7)
                if rolling_hash not in dict:
                    dict[rolling_hash] = True
                elif dict[rolling_hash]:
                    res.append(s[i - 9: i + 1])
                    dict[rolling_hash] = False
            return res
    

    Python:

    def findRepeatedDnaSequences2(self, s):
            """
            :type s: str
            :rtype: List[str]
            """
            l, r = [], []
            if len(s) < 10: return []
            for i in range(len(s) - 9):
                l.extend([s[i:i + 10]])
            return [k for k, v in collections.Counter(l).items() if v > 1]
    

    C++:

    class Solution {
    public:
        vector<string> findRepeatedDnaSequences(string s) {
            unordered_set<int> seen;
            unordered_set<int> dup;
            vector<string> result;
            vector<char> m(26);
            m['A' - 'A'] = 0;
            m['C' - 'A'] = 1;
            m['G' - 'A'] = 2;
            m['T' - 'A'] = 3;
            
            for (int i = 0; i + 10 <= s.size(); ++i) {
                string substr = s.substr(i, 10);
                int v = 0;
                for (int j = i; j < i + 10; ++j) { //20 bits < 32 bit int
                    v <<= 2;
                    v |= m[s[j] - 'A'];
                }
                if (seen.count(v) == 0) { //not seen
                    seen.insert(v);
                } else if (dup.count(v) == 0) { //seen but not dup
                    dup.insert(v);
                    result.push_back(substr);
                } //dup
            }
            return result;
        }
    };
    

      

    All LeetCode Questions List 题目汇总

  • 相关阅读:
    动态tab页
    SQL Server附加数据库拒绝访问
    SQL window身份登陆 SQL server不能登陆
    SQL UPDATE 经典
    Char Varchar Nvarchar区别
    【OpenSSL】创建证书
    makecert 制作数字证书
    cer, pfx 创建,并且读取公钥/密钥,加解密 (C#程序实现)
    CompareValidator ASP控件
    XMLHelper 类
  • 原文地址:https://www.cnblogs.com/lightwindy/p/9770417.html
Copyright © 2011-2022 走看看