入门看的是bilibili上的UESTC的教程,很赞!
建树:字典树的建立方法
构造fail指针:当为根节点的子节点的时候,它的fail指针指向root。
其他的时候通过父亲节点进行对孩子节点的fail指针进行更新。
采用的是BFS
进行匹配查询:
没有失配就一直查询下去,否则的话进入它的fail指针。
理解得不是很好,目前只会套模板,惭愧。
hdu 2222
题意:给出一群单词,然后问在文本串中出现了多少个上边出现的单词。
思路:模板题,因为查询的是文本串中有多少出现单词的个数,相当于只关系种类,不关系某一种单词在文本串中出现的次数,所以在查询之后记得将cnt记为0;
#include<bits/stdc++.h>
using namespace std;
const int maxn = 5e5 + 10;
const int maxm = 1e6 + 10;
typedef long long ll;
#define clr(x,y) memset(x,y,sizeof x)
#define INF 0x3f3f3f3f
const ll Mod = 1e9 + 7;
typedef pair<int,int> P;
typedef unsigned long long ull;
struct Aho
{
queue<int>q;
struct Node
{
int next[26];
int fail,cnt;
}node[maxn];
int siz = 1;
void Init()
{
while(q.size())q.pop();
for(int i = 0;i < maxn;i ++)
{
clr(node[i].next,0);
node[i].fail = node[i].cnt = 0;
}
siz = 1;
}
void inserts(char *s)
{
int len = strlen(s);
int now = 0;
for(int i = 0;i < len;i ++)
{
int x = s[i] - 'a';
if(!node[now].next[x])node[now].next[x] = siz ++;
now = node[now].next[x];
}
node[now].cnt ++;
}
void build()
{
node[0].fail = -1;
q.push(0);
while(q.size())
{
int u = q.front();q.pop();
for(int i = 0;i < 26;i ++)
{
if(node[u].next[i])
{
if(u == 0)node[node[u].next[i]].fail = 0;
else
{
int v = node[u].fail;
while(v != -1)
{
if(node[v].next[i])
{
node[node[u].next[i]].fail = node[v].next[i];break;
}
v = node[v].fail;
}
if(v == -1)node[node[u].next[i]].fail = 0;
}
q.push(node[u].next[i]);
}
}
}
}
int get(int u)
{
int ret = 0;
while(u)
{
ret += node[u].cnt;
node[u].cnt = 0;
u = node[u].fail;
}
return ret;
}
int match(char *s)
{
int len = strlen(s);
int now = 0,ret = 0;
for(int i = 0;i < len;i ++)
{
int x = s[i] - 'a';
if(node[now].next[x])
{
now = node[now].next[x];
}
else
{
int p = node[now].fail;
while(p != -1 && node[p].next[x] == 0)p = node[p].fail;
if(p == -1)
{
now = 0;
}
else
{
now = node[p].next[x];
}
}
if(node[now].cnt)
ret += get(now);
}
return ret;
}
}aho;
char s[maxm];
int main()
{
int Tcase;scanf("%d",&Tcase);
while(Tcase --)
{
aho.Init();
int n;scanf("%d",&n);
for(int i = 1;i <= n;i ++)
{
scanf("%s",s);aho.inserts(s);
}
aho.build( );
scanf("%s",s);
printf("%d\n",aho.match(s));
}
return 0;
}
nyoj 1085 数单词
题意:查询在文本串中出现次数最多的单词,并且将他们按照输入顺序输出,单词中会有重复的单词。
思路:建树的时候注意避免重复的插入字典中,并且把每个节点是单词的最后一个标记一下。查询的时候因为关心的是单词的个数,所以我们需要将所有个数跑出来。
#include<bits/stdc++.h>
#include<iostream>
#include<cstdio>
#include<cstring>
using namespace std;
const int maxn = 4e4 + 10;
const int maxm = 1e6 + 10;
typedef long long ll;
#define clr(x,y) memset(x,y,sizeof x)
#define INF 0x3f3f3f3f
#define IINF 0x3f3f3f3f3f3f3f3f
const ll Mod = 1e9 + 7;
typedef pair<ll,int> P;
typedef unsigned long long ull;
char str[255][150];
int cnts[maxn];
map<string,int>ms;
struct Aho
{
int siz;
queue<int>q;
struct Node
{
int next[26];
int fail,cnt,id;
}node[maxn];
void Init()
{
while(q.size())q.pop();
for(int i = 0;i < maxn;i ++)
{
clr(node[i].next,0);
node[i].fail = node[i].cnt = 0;
node[i].id = 0;
}
siz = 1;
}
void inserts(char *s,int pp)
{
int len = strlen(s);
int now = 0;
for(int i = 0;i < len;i ++)
{
int x = s[i] - 'a';
if(node[now].next[x] == 0)
{
node[now].next[x] = siz ++;
}
now = node[now].next[x];
}
node[now].cnt ++;
node[now].id = pp;
}
void build()
{
node[0].fail = -1;
q.push(0);
while(q.size())
{
int u = q.front();q.pop();
for(int i = 0;i < 26;i ++)
{
if(!node[u].next[i])continue;
if(u == 0)node[node[u].next[i]].fail = 0;
else
{
int p = node[u].fail;
while(p != -1)
{
if(node[p].next[i])
{
node[node[u].next[i]].fail = node[p].next[i];
break;
}
p = node[p].fail;
}
if(p == -1)node[node[u].next[i]].fail = 0;
}
q.push(node[u].next[i]);
}
}
}
void fun(int u)
{
while(u)
{
cnts[node[u].id] += node[u].cnt;
u = node[u].fail;
}
}
void query(char * s)
{
int ret = 0;
int len = strlen(s),now = 0;
for(int i = 0;i < len;i ++)
{
int x = s[i] - 'a';
if(node[now].next[x])now = node[now].next[x];
else
{
int p = now;
while(p != -1 && node[p].next[x] == 0)p = node[p].fail;
if(p == -1)
now = 0;
else now = node[p].next[x];
}
fun(now);
}
}
}aho;
char s[maxm];
int ans[maxn];
int main()
{
int Tcase;scanf("%d",&Tcase);
while(Tcase --)
{
aho.Init();
clr(cnts,0);ms.clear();
int n;scanf("%d",&n);
for(int i = 1;i <= n;i ++)
{
scanf("%s",str[i]);
if(!ms[str[i]])
{
ms[str[i]] = -1;
aho.inserts(str[i],i);
}
}
aho.build();
scanf("%s",s);
aho.query(s);
int len = 0;
int maxs = 0;
for(int i = 1;i <= n;i ++)maxs = max(maxs,cnts[i]);
printf("%d\n",maxs);
for(int i = 1;i <= n;i ++)
{
if(cnts[i] == maxs)
{
ms[str[i]] = 1;
}
}
for(int i = 1;i <= n;i ++)
if(ms[str[i]] > 0)
printf("%s\n",str[i]);
}
return 0;
}
hdu 2896
MLE很大的问题,一直在改数组的大小。
#include<bits/stdc++.h>
using namespace std;
#define clr(x,y) memset(x,y,sizeof x)
const int maxn = 6e4 + 10;
vector<int>ans;
bool vis[510];
struct Aho
{
struct Node
{
int next[128];
int fail,cnt,id;
}node[maxn];
int siz;
queue<int>q;
void Init()
{
siz = 1;while(q.size())q.pop();
for(int i = 0;i < maxn;i ++)
{
clr(node[i].next,0);
node[i].id = node[i].fail = node[i].cnt = 0;
}
}
void Insert(char *s,int pp)
{
int len = strlen(s);
int now = 0;
for(int i = 0;i < len;i ++)
{
if(node[now].next[s[i]] == 0)
{
node[now].next[s[i]] = siz ++;
}
now = node[now].next[s[i]];
}
node[now].cnt ++;
node[now].id = pp;
}
void build()
{
node[0].fail = -1;
q.push(0);
while(q.size())
{
int u = q.front();q.pop();
for(int i = 0;i < 128;i ++)
{
if(node[u].next[i] == 0)continue;
if(u == 0)node[i].fail = 0;
else
{
int p = node[u].fail;
while(p != -1)
{
if(node[p].next[i])
{
node[node[u].next[i]].fail = node[p].next[i];break;
}
p = node[p].fail;
}
if(p == -1)node[node[u].next[i]].fail = 0;
}
q.push(node[u].next[i]);
}
}
}
void fun(int u)
{
while(u)
{
if(node[u].id && !vis[node[u].id])
ans.push_back(node[u].id),vis[node[u].id] = true;
u = node[u].fail;
}
}
void match(char *s)
{
int len = strlen(s);
int now = 0;
for(int i = 0;i < len;i ++)
{
if(node[now].next[s[i]])
now = node[now].next[s[i]];
else
{
int p = now;
while(p != -1 && node[p].next[s[i]] == 0)p = node[p].fail;
if(p == -1)
{
now = 0;
}
else now = node[p].next[s[i]];
}
if(!vis[node[now].id])
fun(now);
}
}
}aho;
int n,m;
char s[10010];
int main()
{
while( ~ scanf("%d",&n))
{
aho.Init();
for(int i = 1;i <= n;i ++)
{
scanf("%s",s);
aho.Insert(s,i);
}
aho.build();
scanf("%d",&m);
int cnt = 0;
for(int i = 1;i <= m;i ++)
{
if(ans.size())ans.clear();clr(vis,false);
scanf("%s",s);
aho.match(s);
if(ans.size())
{
sort(ans.begin(),ans.end());
printf("web %d:",i);
for(int i = 0;i < ans.size();i ++)
printf(" %d",ans[i]);puts("");
cnt ++;
}
}
printf("total: %d\n",cnt);
}
return 0;
}