DNA Sequence
| Time Limit: 1000MS | Memory Limit: 65536K | |
| Total Submissions: 14426 | Accepted: 5572 |
Description
It's well known that DNA Sequence is a sequence only contains A, C, T and G, and it's very useful to analyze a segment of DNA Sequence,For example, if a animal's DNA sequence contains segment ATC then it may mean that the animal may have a genetic disease. Until now scientists have found several those segments, the problem is how many kinds of DNA sequences of a species don't contain those segments.
Suppose that DNA sequences of a species is a sequence that consist of A, C, T and G,and the length of sequences is a given integer n.
Suppose that DNA sequences of a species is a sequence that consist of A, C, T and G,and the length of sequences is a given integer n.
Input
First line contains two integer m (0 <= m <= 10), n (1 <= n <=2000000000). Here, m is the number of genetic disease segment, and n is the length of sequences.
Next m lines each line contain a DNA genetic disease segment, and length of these segments is not larger than 10.
Next m lines each line contain a DNA genetic disease segment, and length of these segments is not larger than 10.
Output
An integer, the number of DNA sequences, mod 100000.
Sample Input
4 3 AT AC AG AA
Sample Output
36
/*
poj 2778 DNA Sequence(AC自动机 + 矩阵快速幂)
已知一个长度为n的字符串Str由A,T,G,C组成,给你m个子串. 求不包含这些子串的Str有多少种
AC自动机建立可以构建一个关系图。然后将这个关系图转换到矩阵上面,然后利用矩阵快速幂便能
求出从a到b的方案数。
大致就是 子串中没有出现的字符都会被指向root节点。即不停地在图中查找子串,于是我们只要在
路径上不经过ed标记的危险节点即可(每个子串的最后一个节点)。
对于agc、c而言,如果我zou过 a-g-c-d 这个路径。
root
/
a c
/
g
/
c
/
d
由上面这个图可知 左边的d 和 右边的c都是危险节点。 但漏掉了左边上的c
所以如果fail指针指向那个节点是危险节点的话,那么当前节点也是危险节点
AC自动机:http://blog.csdn.net/niushuai666/article/details/7002823
hhh-2016-04-23 15:59:53
*/
#include <iostream>
#include <vector>
#include <cstring>
#include <string>
#include <cstdio>
#include <queue>
#include <functional>
#include <map>
using namespace std;
#define lson (i<<1)
#define rson ((i<<1)|1)
typedef long long ll;
const int maxn = 40010;
const int mod = 100000;
struct Matrix
{
int len;
int ma[105][105];
Matrix() {}
Matrix(int L)
{
len = L;
}
};
Matrix mult(Matrix ta,Matrix tb)
{
Matrix tc;
tc.len = ta.len;
for(int i = 0; i < ta.len; i++)
{
for(int j = 0; j < ta.len; j++)
{
tc.ma[i][j] = 0;
for(int k = 0; k < ta.len; k++){
tc.ma[i][j] = tc.ma[i][j]+(ll)ta.ma[i][k]*tb.ma[k][j]%mod;
tc.ma[i][j] %= mod;
}
}
}
return tc;
}
Matrix pow_mat(Matrix a,ll n)
{
Matrix cnt;
cnt.len = a.len;
memset(cnt.ma,0,sizeof(cnt.ma));
for(int i = 0 ; i < cnt.len; i++)
cnt.ma[i][i] = 1;
while(n)
{
if(n&1) cnt = mult(cnt,a);
a = mult(a,a);
n >>= 1;
}
return cnt;
}
struct Tire
{
int nex[105][4],fail[105],ed[105];
int root,L;
int newnode()
{
for(int i = 0; i < 4; i++)
nex[L][i] = -1;
ed[L++] = 0;
return L-1;
}
void ini()
{
L = 0,root = newnode();
}
int cha(char x)
{
if(x == 'A')
return 0;
else if(x == 'C')
return 1;
else if(x == 'T')
return 2;
else if(x == 'G')
return 3;
}
void inser(char buf[])
{
int len = strlen(buf);
int now = root;
for(int i = 0; i < len; i++)
{
int ta = cha(buf[i]);
if(nex[now][ta] == -1)
nex[now][ta] = newnode();
now = nex[now][ta];
}
ed[now]++;
}
void build()
{
queue<int >q;
fail[root] = root;
for(int i = 0; i < 4; i++)
if(nex[root][i] == -1)
nex[root][i] = root;
else
{
fail[nex[root][i]] = root;
q.push(nex[root][i]);
}
while(!q.empty())
{
int now = q.front();
if(ed[fail[now]])
ed[now] = 1;
q.pop();
for(int i = 0; i < 4; i++)
{
if(nex[now][i] == -1)
nex[now][i] = nex[fail[now]][i];
else
{
fail[nex[now][i]] = nex[fail[now]][i];
q.push(nex[now][i]);
}
}
}
}
Matrix to_mat()
{
Matrix ta(L);
memset(ta.ma,0,sizeof(ta.ma));
for(int i = 0; i < L; i++)
{
for(int j = 0; j < 4; j++)
if(!ed[nex[i][j]])
ta.ma[i][nex[i][j]]++;
}
return ta;
}
};
Tire ac;
char buf[20];
int main()
{
int m;
ll n;
while(scanf("%d%I64d",&m,&n) != EOF)
{
ac.ini();
for(int i = 0; i < m; i++)
{
scanf("%s",buf);
ac.inser(buf);
}
ac.build();
Matrix ta = ac.to_mat();
int ans = 0;
ta = pow_mat(ta,n);
for(int i = 0;i < ta.len;i++)
{
ans = (ans+ta.ma[0][i])%mod;
}
printf("%d
",ans);
}
return 0;
}