zoukankan      html  css  js  c++  java
  • 【POJ2778】DNA Sequence

    Description

    It's well known that DNA Sequence is a sequence only contains A, C, T and G, and it's very useful to analyze a segment of DNA Sequence,For example, if a animal's DNA sequence contains segment ATC then it may mean that the animal may have a genetic disease. Until now scientists have found several those segments, the problem is how many kinds of DNA sequences of a species don't contain those segments.

    Suppose that DNA sequences of a species is a sequence that consist of A, C, T and G,and the length of sequences is a given integer n.

    Input

    First line contains two integer m (0 <= m <= 10), n (1 <= n <=2000000000). Here, m is the number of genetic disease segment, and n is the length of sequences.

    Next m lines each line contain a DNA genetic disease segment, and length of these segments is not larger than 10.

    Output

    An integer, the number of DNA sequences, mod 100000.

    Sample Input

    4 3

    AT

    AC

    AG

    AA

    Sample Output

    36


    题意简单来说,就是【POJ1625】检查Censored!加了矩阵乘法优化


    思路:

    AC自动机+DP,处理失败指针(fail)时继承失败指针的标记(flag)

    trie[v].flag|=trie[trie[trie[u].fail].ch[i]].flag;
    

    o(n)太大,用矩阵乘法优化

    注意:

    结构体初始化清零,运算过程要mod,不然要开long long

    代码:

    #include <iostream>
    #include <cstdio>
    #include <queue>
    #include <cstring>
    #define mod 100000
    using namespace std;
    int m,n,cnt;
    struct node{
    	long long e[105][105];
    	void clear(){memset(e,0,sizeof(e));}
    }sequ,ans;
    int rflag(char a)
    {
    	if(a=='A') return 0;
    	if(a=='C') return 1;
    	if(a=='T') return 2;
    	if(a=='G') return 3;
    }
    struct fdfdfd{
    	int flag,fail,ch[5];
    	void clear(){flag=0; fail=0; memset(ch,0,sizeof(ch));}
    }trie[105];
    void insert(char a[],int len)
    {
    	int root=0;
    	for(int i=0;i<len;++i)
    	{
    		int j=rflag(a[i]);
    		if(!trie[root].ch[j]) trie[cnt+1].clear(),trie[root].ch[j]=++cnt;
    		root=trie[root].ch[j];
    	}
    	trie[root].flag=1;
    }
    void getfail()
    {
    	queue<int> q;
    	for(int i=0;i<4;++i)
    		if(trie[0].ch[i]) q.push(trie[0].ch[i]);
    	while(!q.empty())
    	{
    		int u=q.front(); q.pop();
    		for(int i=0;i<4;++i)
    		{
    			int v=trie[u].ch[i];
    			if(!v) trie[u].ch[i]=trie[trie[u].fail].ch[i];
    			else
    			{
    				trie[v].flag|=trie[trie[trie[u].fail].ch[i]].flag;
    				trie[v].fail=trie[trie[u].fail].ch[i];
    				q.push(v);
    			}
    		}
    	}
    }
    node Mul(node &a,node &b)
    {
    	node temp; temp.clear();
    	for(int i=0;i<=cnt;++i)
    		for(int j=0;j<=cnt;++j)
    		{
    			temp.e[i][j]=0;
    			for(int k=0;k<=cnt;++k)	temp.e[i][j]+=a.e[i][k]*b.e[k][j]%mod,temp.e[i][j]%=mod;
    		}
    	return temp;
    }
    int main()
    {
    	scanf("%d%d",&m,&n);
    	trie[0].clear();
    	for(int i=1;i<=m;++i)
    	{
    		char temp[12]; scanf("%s",&temp);
    		insert(temp,strlen(temp));
    	}
    	getfail();
    	ans.clear(); sequ.clear();
    	for(int i=0;i<=cnt;++i)
    		for(int j=0;j<4;++j)
    			if(!trie[i].flag&&!trie[trie[i].ch[j]].flag) ++sequ.e[i][trie[i].ch[j]];
    	for(int i=0;i<=cnt;++i) ans.e[i][i]=1;
    	for(;n;n>>=1)
    	{
    		if(n&1) ans=Mul(ans,sequ);
    		sequ=Mul(sequ,sequ);
    	}
    	long long sum=0;
    	for(int i=0;i<=cnt;++i) sum=(sum+ans.e[0][i])%mod;
    	printf("%lld
    ",sum);
    	return 0;
    }
    
  • 相关阅读:
    Installing Apache Spark on Ubuntu 16.04
    基尼系数(Gini coefficient),洛伦茨系数
    非平衡数据机器学习
    FCM聚类算法介绍
    基于大数据技术的手机用户画像与征信研究
    归一化方法 Normalization Method
    区块链(Blockchain)
    统计抽样
    动态规划 Dynamic Programming
    LTE中的各种ID含义
  • 原文地址:https://www.cnblogs.com/wuwendongxi/p/13215944.html
Copyright © 2011-2022 走看看