zoukankan      html  css  js  c++  java
  • 关于广义后缀树(多串SAM)的总结

    之前我们给的SAM的例题,基本上是一个串建SAM的就能做的

    如果要建多个串的SAM应该怎么做呢

    首先看题,bzoj2780

    我一开始的想法是SA以前的弄法,把串拼起来,中间加分隔符做SAM

    这题确实可以这么做,这样根据SAM能识别所有子串的性质

    而且每个节点都代表了唯一的一个串

    每个询问串我们都能找到最终转移到哪(找不到就是没出现过)

    问在多少个串出现过这就等价于在ST(s)的parent树的子树中,出现了多少种不同的权值

    这显然可以维护dfs序,用经典的离线做法来搞(更好的写法见文末UPD)

      1 type node=record
      2        po,next:longint;
      3      end;
      4 
      5 var go:array[0..300010,1..27] of longint;
      6     d,mx,fa,l,r,p,c,wh,w,fir,next:array[0..300010] of longint;
      7     ans,a,b:array[0..60010] of longint;
      8     e:array[0..300010] of node;
      9     h,t,k,last,len,i,j,n,q,x:longint;
     10     s,ss:ansistring;
     11 
     12 function lowbit(x:longint):longint;
     13   begin
     14     exit(x and (-x));
     15   end;
     16 
     17 function cmp(a,b:longint):boolean;
     18   begin
     19     exit(l[a]<l[b]);
     20   end;
     21 
     22 procedure swap(var a,b:longint);
     23   var c:longint;
     24   begin
     25     c:=a;
     26     a:=b;
     27     b:=c;
     28   end;
     29 
     30 procedure sort(l,r:longint);
     31   var i,j,x:longint;
     32   begin
     33     i:=l;
     34     j:=r;
     35     x:=a[(l+r) shr 1];
     36     repeat
     37       while cmp(a[i],x) do inc(i);
     38       while cmp(x,a[j]) do dec(j);
     39       if not(i>j) then
     40       begin
     41         swap(a[i],a[j]);
     42         swap(b[i],b[j]);
     43         inc(i);
     44         dec(j);
     45       end;
     46     until i>j;
     47     if l<j then sort(l,j);
     48     if i<r then sort(i,r);
     49   end;
     50 
     51 procedure build(x,y:longint);
     52   begin
     53     inc(len);
     54     e[len].po:=y;
     55     e[len].next:=p[x];
     56     p[x]:=len;
     57   end;
     58 
     59 procedure add(c,x:longint);
     60   var p,q,np,nq:longint;
     61   begin
     62     p:=last;
     63     inc(t); last:=t; np:=t;
     64     w[np]:=x; mx[np]:=mx[p]+1;
     65     while (p<>0) and (go[p,c]=0) do
     66     begin
     67       go[p,c]:=np;
     68       p:=fa[p];
     69     end;
     70     if p=0 then fa[np]:=1
     71     else begin
     72       q:=go[p,c];
     73       if mx[q]=mx[p]+1 then fa[np]:=q
     74       else begin
     75         inc(t); nq:=t;
     76         mx[nq]:=mx[p]+1;
     77         go[nq]:=go[q];
     78         fa[nq]:=fa[q];
     79         fa[q]:=nq; fa[np]:=nq;
     80         while go[p,c]=q do
     81         begin
     82           go[p,c]:=nq;
     83           p:=fa[p];
     84         end;
     85       end;
     86     end;
     87   end;
     88 
     89 procedure dfs(x:longint);
     90   var i:longint;
     91   begin
     92     inc(h);
     93     l[x]:=h;
     94     d[h]:=w[x];  //dfs序
     95     i:=p[x];
     96     while i<>0 do
     97     begin
     98       dfs(e[i].po);
     99       i:=e[i].next;
    100     end;
    101     r[x]:=h;
    102   end;
    103 
    104 procedure work(x:longint);
    105   begin
    106     while x<=t do
    107     begin
    108       inc(c[x]);
    109       x:=x+lowbit(x);
    110     end;
    111   end;
    112 
    113 function ask(x:longint):longint;
    114   begin
    115     ask:=0;
    116     while x>0 do
    117     begin
    118       ask:=ask+c[x];
    119       x:=x-lowbit(x);
    120     end;
    121   end;
    122 
    123 begin
    124   readln(n,q);
    125   for i:=1 to n do
    126   begin
    127     readln(ss);
    128     len:=length(ss);
    129     for j:=1 to len do  //拼接
    130     begin
    131       s:=s+ss[j];
    132       inc(t); wh[t]:=i;
    133     end;
    134     if i<>n then
    135     begin
    136       s:=s+chr(97+26);
    137       inc(t);
    138     end;
    139   end;
    140   len:=length(s);
    141   t:=1; last:=1;
    142   for i:=len downto 1 do
    143     add(ord(s[i])-96,wh[i]);
    144 
    145   len:=0;
    146   for i:=2 to t do  //构建树
    147     build(fa[i],i);
    148 
    149   dfs(1);
    150   for i:=1 to q do
    151   begin
    152     readln(s);
    153     j:=1;
    154     len:=length(s);
    155     for k:=len downto 1 do  //每个询问串最终转移到哪
    156     begin
    157       x:=ord(s[k])-96;
    158       if go[j,x]=0 then
    159       begin
    160         j:=0;
    161         break;
    162       end
    163       else j:=go[j,x];
    164     end;
    165     a[i]:=j;
    166     b[i]:=i;
    167   end;
    168   for i:=t downto 2 do  //经典的离线做法
    169   begin
    170     next[i]:=fir[d[i]];
    171     fir[d[i]]:=i;
    172   end;
    173   for i:=1 to n do
    174     if fir[i]<>0 then work(fir[i]);
    175   sort(1,q);
    176   j:=1;
    177   while a[j]=0 do inc(j);
    178   for i:=1 to t do
    179   begin
    180     while (j<=q) and (l[a[j]]=i) do
    181     begin
    182       ans[b[j]]:=ask(r[a[j]])-ask(i-1);
    183       inc(j);
    184     end;
    185     if d[i]<>0 then
    186       if next[i]<>0 then work(next[i]);
    187   end;
    188   for i:=1 to q do
    189     writeln(ans[i]);
    190 end.
    2780

    然后我看到了bzoj3277 3473(双倍经验)

    用我刚才的做法似乎不好搞,因为这题问每个字符串有多少子串出现在至少k个子串中

    而刚才那种拼接,每个节点可接受的子串会搞出一堆不存在的子串

    这时,我膜拜了wyfcyx的构建广义后缀树的做法,显然这里每个串要反序建SAM(这样才能构造出原串的后缀树)

    他的做法是建完一个串的SAM后回到根,对下个串s先匹配

    如果转移到的节点在SAM中可接受的最长串长度=当前匹配s的长度,那么这个节点可以代表s

    否则的话就像SAM一样新开一个节点,感觉和可持久化的思想很像

    这样每个节点可能代表了多个串的子串,并且也没有多出来奇怪的子串

    而且每个节点可接受串出现在多少个串中依然=parent树的子树中,出现了多少种不同的权值

    这样我们可以像刚才一样,求出每个点出现次数,如果大于等于k,

    那么根据之前的性质,这个点p可接受串的长度为[max[fa[p]]+1,max[p]]

    那么点p能做出的贡献即为max[p]-max[fa[p]],否则贡献为0

    由于子串是某个后缀的前缀,所以每个字符串的答案等于所有这个字符串的后缀节点的从根到该节点的权值和

      1 type node=record
      2        po,next:longint;
      3      end;
      4 
      5 var e,w,pr:array[0..400010] of node;
      6     go:array[0..400010,'a'..'z'] of longint;
      7     sa,r,h,q,p,c,d,cur,a,b,mx,fa:array[0..400010] of longint;
      8     g,ans:array[0..400010] of int64;
      9     n,k,l,ti,y,f,i,j,len,x,t,last:longint;
     10     s:ansistring;
     11     ch:char;
     12 
     13 function lowbit(x:longint):longint;
     14   begin
     15     exit(x and (-x));
     16   end;
     17 
     18 procedure swap(var a,b:longint);
     19   var c:longint;
     20   begin
     21     c:=a;
     22     a:=b;
     23     b:=c;
     24   end;
     25 
     26 procedure ins(x,y:longint);
     27   begin
     28     inc(len);
     29     e[len].po:=y;
     30     e[len].next:=p[x];
     31     p[x]:=len;
     32   end;
     33 
     34 procedure add(c:char;x:longint);
     35   var p,q,np,nq:longint;
     36   begin
     37     p:=last;
     38     inc(t); last:=t; np:=t;
     39     mx[np]:=mx[p]+1;
     40     while (p<>0) and (go[p,c]=0) do
     41     begin
     42       go[p,c]:=np;
     43       p:=fa[p];
     44     end;
     45     if p=0 then fa[np]:=1
     46     else begin
     47       q:=go[p,c];
     48       if mx[q]=mx[p]+1 then fa[np]:=q
     49       else begin
     50         inc(t); nq:=t;
     51         mx[nq]:=mx[p]+1;
     52         go[nq]:=go[q];
     53         fa[nq]:=fa[q];
     54         fa[q]:=nq; fa[np]:=nq;
     55         while go[p,c]=q do
     56         begin
     57           go[p,c]:=nq;
     58           p:=fa[p];
     59         end;
     60       end;
     61     end;
     62   end;
     63 
     64 procedure change(c:char);
     65   var p,np,q:longint;
     66   begin
     67     p:=go[last,c];
     68     if mx[p]=mx[last]+1 then last:=p
     69     else begin
     70       inc(t); np:=t;
     71       mx[np]:=mx[last]+1;
     72       go[np]:=go[p];
     73       fa[np]:=fa[p];
     74       fa[p]:=np;
     75       q:=last;
     76       while go[q,c]=p do
     77       begin
     78         go[q,c]:=np;
     79         q:=fa[q];
     80       end;
     81       last:=np;
     82     end;
     83   end;
     84 
     85 procedure dfs(x:longint);
     86   var i:longint;
     87   begin
     88     inc(ti);
     89     sa[ti]:=x;   //dfs序上对应哪个点
     90     b[x]:=ti;
     91     i:=p[x];
     92     while i<>0 do
     93     begin
     94       dfs(e[i].po);
     95       i:=e[i].next;
     96     end;
     97     r[x]:=ti;
     98   end;
     99 
    100 procedure dfss(x:longint);
    101   var i:longint;
    102   begin
    103     g[x]:=g[x]+g[fa[x]];
    104     i:=p[x];
    105     while i<>0 do
    106     begin
    107       dfss(e[i].po);
    108       i:=e[i].next;
    109     end;
    110   end;
    111 
    112 procedure work(x,w:longint);
    113   begin
    114     while x<=t do
    115     begin
    116       inc(c[x],w);
    117       x:=x+lowbit(x);
    118     end;
    119   end;
    120 
    121 function ask(x:longint):longint;
    122   begin
    123     ask:=0;
    124     while x>0 do
    125     begin
    126       ask:=ask+c[x];
    127       x:=x-lowbit(x);
    128     end;
    129   end;
    130 
    131 procedure put(x,y:longint);
    132   begin
    133     inc(len);  //这个串x所有后缀所在的节点
    134     w[len].po:=y;
    135     w[len].next:=q[x];
    136     q[x]:=len;
    137     pr[len].po:=x;  //节点代表了哪些串的后缀
    138     pr[len].next:=h[y];
    139     h[y]:=len;
    140   end;
    141 
    142 function cmp(a,b:longint):boolean;
    143   begin
    144     exit(r[a]<r[b]);
    145   end;
    146 
    147 procedure sort(l,r:longint);
    148   var i,j,x:longint;
    149   begin
    150     i:=l;
    151     j:=r;
    152     x:=a[(l+r) shr 1];
    153     repeat
    154       while cmp(a[i],x) do inc(i);
    155       while cmp(x,a[j]) do dec(j);
    156       if not(i>j) then
    157       begin
    158         swap(a[i],a[j]);
    159         inc(i);
    160         dec(j);
    161       end;
    162     until i>j;
    163     if l<j then sort(l,j);
    164     if i<r then sort(i,r);
    165   end;
    166 
    167 begin
    168   readln(n,k);
    169   last:=1; t:=1;
    170   for i:=1 to n do
    171   begin
    172     readln(s);
    173     l:=length(s); last:=1;
    174     for j:=l downto 1 do
    175     begin
    176       if go[last,s[j]]<>0 then change(s[j]) //广义后缀树
    177       else add(s[j],i);
    178       put(i,last);
    179     end;
    180   end;
    181   len:=0;
    182   for i:=2 to t do
    183     ins(fa[i],i);
    184 
    185   dfs(1);
    186   for i:=1 to t do
    187     a[i]:=i;
    188   sort(1,t);
    189   j:=1; x:=a[1];
    190   for i:=1 to t do
    191   begin
    192     f:=h[sa[i]];
    193     while f<>0 do  //因为一个节点可能代表了多个穿,插入相对麻烦
    194     begin
    195       y:=pr[f].po;
    196       if cur[y]<>0 then work(cur[y],-1);
    197       cur[y]:=i;
    198       work(i,1);
    199       f:=pr[f].next;
    200     end;
    201     while (j<=t) and (r[x]=i) do
    202     begin
    203       len:=ask(i)-ask(b[x]-1);
    204       if len<k then g[x]:=0 else g[x]:=mx[x]-mx[fa[x]];
    205       inc(j); x:=a[j];
    206     end;
    207     if j=t+1 then break;
    208   end;
    209   dfss(1);
    210   for i:=1 to n do
    211   begin
    212     j:=q[i];
    213     while j<>0 do
    214     begin
    215       x:=w[j].po;
    216       ans[i]:=ans[i]+g[x];
    217       j:=w[j].next;
    218     end;
    219   end;
    220   for i:=1 to n do
    221     write(ans[i],' ');
    222   writeln;
    223 end.
    View Code

    bzoj2806 第一道小强和阿米巴的题,解锁新成就

    构造出标准作文库的SAM后,L0不难想到二分答案吧

    然后我们可以求出以询问串每个位置i为结尾的最长子串长度P[i]

    不难得到f[i]到i最长熟悉 f[i]=max(f[i-1],f[j]+i-j) (i-j>=l0 且 (i-j<=P[i])

    然后这个是明显的单调队列优化吧

      1 var go:array[0..1200010*2,'0'..'1'] of longint;
      2     q,v,f:array[0..1200010] of longint;
      3     fa,mx:array[0..1200010*2] of longint;
      4     ans,mid,i,n,m,last,t,l,r,j:longint;
      5     s:ansistring;
      6     c:char;
      7 
      8 function max(a,b:longint):longint;
      9   begin
     10     if a>b then exit(a) else exit(b);
     11   end;
     12 
     13 procedure change(c:char);
     14   var q,p,np:longint;
     15   begin
     16     p:=go[last,c];
     17     if mx[p]=mx[last]+1 then last:=p
     18     else begin
     19       inc(t); np:=t;
     20       mx[np]:=mx[last]+1;
     21       go[np]:=go[p];
     22       fa[np]:=fa[p];
     23       fa[p]:=np;
     24       q:=last;
     25       while go[q,c]=p do
     26       begin
     27         go[q,c]:=np;
     28         q:=fa[q];
     29       end;
     30       last:=np;
     31     end;
     32   end;
     33 
     34 procedure add(c:char);
     35   var p,q,np,nq:longint;
     36   begin
     37     p:=last;
     38     inc(t); last:=t; np:=t;
     39     mx[np]:=mx[p]+1;
     40     while (p<>0) and (go[p,c]=0) do
     41     begin
     42       go[p,c]:=np;
     43       p:=fa[p];
     44     end;
     45     if p=0 then fa[np]:=1
     46     else begin
     47       q:=go[p,c];
     48       if mx[q]=mx[p]+1 then fa[np]:=q
     49       else begin
     50         inc(t); nq:=t;
     51         mx[nq]:=mx[p]+1;
     52         go[nq]:=go[q];
     53         fa[nq]:=fa[q];
     54         fa[q]:=nq; fa[np]:=nq;
     55         while go[p,c]=q do
     56         begin
     57           go[p,c]:=nq;
     58           p:=fa[p];
     59         end;
     60       end;
     61     end;
     62   end;
     63 
     64 procedure match;
     65   var i,j,l,t:longint;
     66   begin
     67     j:=1; t:=0;
     68     l:=length(s);
     69     for i:=1 to l do
     70     begin
     71       if go[j,s[i]]<>0 then
     72       begin
     73         inc(t);
     74         j:=go[j,s[i]];
     75       end
     76       else begin
     77         while (j<>0) and (go[j,s[i]]=0) do j:=fa[j];
     78         if j=0 then
     79         begin
     80           t:=0;
     81           j:=1;
     82         end
     83         else begin
     84           t:=mx[j]+1;
     85           j:=go[j,s[i]];
     86         end;
     87       end;
     88       v[i]:=t;
     89     end;
     90   end;
     91 
     92 function cmp(i,j:longint):boolean;
     93   begin
     94     exit(f[i]-i<f[j]-j);
     95   end;
     96 
     97 function check(l0:longint):boolean;
     98   var h,t,i,n:longint;
     99   begin
    100     n:=length(s);
    101     for i:=0 to l0-1 do
    102       f[i]:=0;
    103     h:=1; t:=0;
    104     for i:=l0 to n do
    105     begin
    106       while (h<=t) and (cmp(q[t],i-l0)) do dec(t);
    107       inc(t);
    108       q[t]:=i-l0;
    109       f[i]:=f[i-1];
    110       while (h<=t) and (q[h]<i-v[i]) do inc(h);
    111       if h<=t then f[i]:=max(f[i],f[q[h]]+i-q[h]);
    112     end;
    113     if f[n]/n>=0.89999999999 then exit(true) else exit(false);
    114   end;
    115 
    116 begin
    117   readln(n,m);
    118   t:=1;
    119   for i:=1 to m do
    120   begin
    121     readln(s);
    122     last:=1;
    123     l:=length(s);
    124     for j:=1 to l do
    125       if go[last,s[j]]<>0 then change(s[j])
    126       else add(s[j]);
    127   end;
    128   for i:=1 to n do
    129   begin
    130     readln(s);
    131     match;
    132     l:=0;
    133     r:=length(s);
    134     while l<=r do
    135     begin
    136       mid:=(l+r) shr 1;
    137       if check(mid) then
    138       begin
    139         ans:=mid;
    140         l:=mid+1;
    141       end
    142       else r:=mid-1;
    143     end;
    144     writeln(ans);
    145   end;
    146 end.
    2806

    UPD:以前写的广义后缀树有点冗长,最近转c++重新写了一份感觉好多了……

    以我之前写的2780为例

      1 #include<iostream>
      2 #include<cstring>
      3 #include<cstdio>
      4 #include<stdlib.h>
      5 #include<algorithm>
      6 #include<vector>
      7 
      8 using namespace std;
      9 vector<int> b[200010],q[10010];
     10 //b[]记录每个节点是哪些串的子串,q[]记录每个串所有后缀所在的节点
     11 struct way{int po,next;} e[200010];
     12 struct node{int w,id;} a[60010];
     13 int ans[60010],w[200010],go[200010][26],fa[200010],mx[200010],l[200010],r[200010],p[200010],c[200010];
     14 int len,t,last,n,m;
     15 char s[100010];
     16 
     17 bool cmp(node a,node b)
     18 {
     19      return l[a.w]<l[b.w];
     20 }
     21 
     22 void work(int c)  //比较优美的写法
     23 {
     24      int np,nq,q,p=last;
     25      if (!go[last][c])
     26      {
     27         np=++t;
     28         mx[np]=mx[p]+1;
     29         for (;p&&!go[p][c];p=fa[p]) go[p][c]=np;
     30      }
     31      else np=0; 
     32      if (!p) fa[np]=1;
     33      else {
     34           q=go[p][c];
     35           if (mx[q]==mx[p]+1) fa[np]=q;
     36           else {
     37                nq=++t; 
     38                mx[nq]=mx[p]+1;
     39                memcpy(go[nq],go[q],sizeof(go[q]));
     40                fa[nq]=fa[q]; fa[q]=fa[np]=nq;
     41                for (;go[p][c]==q;p=fa[p]) go[p][c]=nq;
     42           }
     43      }
     44      last=go[last][c];
     45 }
     46  
     47 void build(int x,int y)
     48 {
     49      e[++len].po=y;
     50      e[len].next=p[x];
     51      p[x]=len;
     52 }   
     53 
     54 void dfs(int x)
     55 {
     56      l[x]=++t; w[t]=x;
     57      for (int i=p[x];i;i=e[i].next)
     58      {
     59          dfs(e[i].po);
     60      }
     61      r[x]=t;
     62 }
     63    
     64 void add(int x,int w)
     65 {
     66      for (int i=x;i<=t;i+=i&-i) c[i]+=w;
     67 }         
     68 
     69 int ask(int x)
     70 {
     71      int s=0;
     72      for (int i=x;i;i-=i&-i) s+=c[i];
     73      return s;
     74 }
     75 int main()
     76 {
     77     scanf("%d%d",&n,&m);
     78     t=last=1;
     79     for (int i=1; i<=n; i++)
     80     {
     81         scanf("%s",s+1); len=strlen(s+1); 
     82         last=1;
     83         for (int j=1; j<=len;j++)
     84         {
     85             work(s[j]-'a');   
     86             b[last].push_back(i);
     87         }         
     88     }
     89     len=fa[0]=0;
     90     for (int i=2; i<=t; i++) build(fa[i],i);
     91     t=0; dfs(1);
     92     for (int i=1; i<=m; i++)
     93     {
     94         scanf("%s",s+1); len=strlen(s+1);
     95         int j=1;
     96         for (int k=1;k<=len;k++)
     97         {
     98             if (!go[j][s[k]-'a']) {j=0;break;}
     99             j=go[j][s[k]-'a'];
    100         }
    101         a[i].w=j; a[i].id=i;
    102     }
    103     sort(a+1,a+1+m,cmp);
    104     vector<int>::iterator k;
    105     for (int i=1;i<=t;i++)
    106         for (k=b[w[i]].begin();k!=b[w[i]].end(); k++) q[*k].push_back(i);
    107     vector<int>::iterator cur[10010];
    108     for (int i=1; i<=n; i++)
    109     {
    110         cur[i]=q[i].begin();
    111         if (cur[i]!=q[i].end()) {add(*cur[i],1); cur[i]++;}
    112     }
    113     int j=1;
    114     while (!a[j].w) j++;  
    115     for (int i=1; i<=t; i++)
    116     {
    117         for (;l[a[j].w]==i; j++) ans[a[j].id]=ask(r[a[j].w])-ask(l[a[j].w]-1);
    118         for (k=b[w[i]].begin();k!=b[w[i]].end(); k++)
    119         {
    120             int x=*k;
    121             if (cur[x]!=q[x].end()) {add(*cur[x],1); cur[x]++;}
    122         }
    123     }        
    124     for (int i=1; i<=m; i++) printf("%d
    ",ans[i]);
    125     system("pause");
    126     return 0;
    127 }
    2780(c++更新版)
  • 相关阅读:
    linux设备和驱动加载的先后顺序
    CCT之CAMERA TUNNING调试学习总结
    MTK Android software Tools工具的说明
    Android开发概要记录
    报表软件公司高价悬赏BUG,100块1个我真是醉了
    OV5640全景模式预览倒180度,拍照正常的问题
    一起学Python:TCP简介
    udp绑定信息
    应用:udp聊天器
    socket简介
  • 原文地址:https://www.cnblogs.com/phile/p/4511571.html
Copyright © 2011-2022 走看看