近日复习下AC机,顺便把以前的指针写法改成数组写法,删了以前的AC机相关博文(代码库2333),原本的指针写法实在太长了,而且内存下不来,随随便便就MLE。
教程可以看 别人的博客
具体写法各异,以下说明针对下面代码的写法,这种写法对数组的破坏性比较大。
插入到trie树
next[cur][k] : cur号点下的k号子节点的节点编号,插入字符串就很easy
void insert(char *s1) {
int len = strlen(s1);
int cur = root;
for (int i = 0; i < len; ++i) {
int id = s1[i] - beginCh;
if (next[cur][id] == -1) {
next[cur][id] = newNode();
}
cur = next[cur][id];
}
leaf[cur] = true;
}
构造失败指针
对于每个父节点u,其k号子节点的fail值 = fail[父节点u] 对应点的k号子节点。若k号子节点不存在,则修改该k号点的next
值为next[ fail[父节点u]下的k号点对应值,注意是改了next值,详见代码。
root点特殊处理:它的子节点,若不存在则修改next为root,若存在则修改失败指针为root,且入队列。
void build() {
queue<int>Q;
fail[root] = root;
for (int i = 0; i < maxch; ++i) {
if (next[root][i] == -1) next[root][i] = root;
else fail[next[root][i]] = root, Q.push(next[root][i]); //存在则指定fail到root
}
while (!Q.empty()) {
int cur = Q.front(); Q.pop();
for (int i = 0; i < maxch; ++i) {
if (next[cur][i] == -1) next[cur][i] = next[fail[cur]][i];
else {
fail[next[cur][i]] = next[fail[cur]][i];
Q.push(next[cur][i]);
}
}
}
}
查询
查询变化多,根据具体情况改,这里以在文本串里查找模式串集合字符串出现个数为例。
int query(char *s1) {
int len = strlen(s1);
int cur = root;
int ans = 0;
for (int i = 0; i < len; ++i) {
int k = s1[i] - beginCh;
int son = next[cur][k];
cur = son;
while (son != root && leaf[son]) {
ans += leaf[son];
leaf[son] = 0;//防止重复访问计数
son = fail[son];
}
}
return ans;
}
注意:上面那张写法只在叶子结点时跳转fail,速度快也能过题,但有一种情况匹配不出来:在 abcdf
里查找 [ bc , abcde ]
会找不到 bc
,貌似OJ题目都没这种数据,建议用下面这张写法
while (son != root ) {
ans += leaf[son];//leaf[]初始为0时才能这样,不然就需要if判断
leaf[son] = 0;
son = fail[son];
}
水题列表
HDU 2222 Keywords Search 真裸题
给一堆串,再给一个长串,求有多少串在长串里出现
#include <cstdio>
#include <cmath>
#include <cstring>
#include <string>
#include <iostream>
#include <algorithm>
#include <queue>
using namespace std;
#define ll long long
#define clr( a , x ) memset ( a , x , sizeof (a) );
#define RE freopen("in.txt","r",stdin);
#define WE freopen("out.txt","w",stdout);
//http://acm.split.hdu.edu.cn/showproblem.php?pid=2222
const int maxn = 10000 * 26 * 50/50 + 5; //全部字符不同的情况才需要*50,因为这样的数据不太可能就开小点减空间
const int maxch = 26;
const char beginCh = 'a';
struct tree
{
int next[maxn][maxch];
int leaf[maxn];
int fail[maxn];
int cnt, root;
int newNode() {
memset(next[cnt], -1, sizeof(next[cnt]));
return cnt++;
}
void init() {
cnt = 0;
root = newNode();
memset(leaf, 0, sizeof(leaf));
}
void insert(char *s1) {
int len = strlen(s1);
int cur = root;
for (int i = 0; i < len; ++i) {
int id = s1[i] - beginCh;
if (next[cur][id] == -1) {
next[cur][id] = newNode();
}
cur = next[cur][id];
}
leaf[cur] ++;
}
void build() {
queue<int>Q;
fail[root] = root;
for (int i = 0; i < maxch; ++i) {
if (next[root][i] == -1) next[root][i] = root; //不存在的点修改next为root
else fail[next[root][i]] = root, Q.push(next[root][i]); //存在则指定fail到root
}
while (!Q.empty()) {
int cur = Q.front(); Q.pop();
for (int i = 0; i < maxch; ++i) {
if (next[cur][i] == -1) next[cur][i] = next[fail[cur]][i];
else {
fail[next[cur][i]] = next[fail[cur]][i];
Q.push(next[cur][i]);
}
}
}
}
int query(char *s1) {
int len = strlen(s1);
int cur = root;
int ans = 0;
for (int i = 0; i < len; ++i) {
int k = s1[i] - beginCh;
int son = next[cur][k];
cur = son;
while (son != root && leaf[son]) { //加上 && leaf[son] 减少无用功,当前字符ch不是结尾的话怎么跳也不会是ch结尾
ans += leaf[son];
leaf[son] = 0;//防止重复访问
son = fail[son];
}
}
return ans;
}
} ac;
int main() {
// RE
int t, n;
char s1[1000002];
scanf("%d", &t);
while (t--) {
scanf("%d", &n);
ac.init();
for (int i = 0; i < n; ++i) {
scanf("%s", s1);
ac.insert(s1);
}
ac.build();
scanf("%s", s1);
printf("%d\n", ac.query(s1));
}
return 0;
}
HDU 3065 病毒侵袭持续中
开个数组记录病毒出现次数, 输出即可。字符集不到100个,因为空格(32)起才是可见字符。
#include <cstdio>
#include <cmath>
#include <cstring>
#include <iostream>
#include <queue>
using namespace std;
#define ll long long
#define RE freopen("in.txt","r",stdin);
#define WE freopen("out.txt","w",stdout);
const int maxn = 1000 * 52 + 5;
const int maxch = 100; //32-126 < 100
const char beginCh = ' '; //第一个可见字符
char word[1002][52];
int times[1002];
struct tree
{
int next[maxn][maxch];
int leaf[maxn];
int cnt, root;
int fail[maxn];
int newNode() {
memset(next[cnt], -1, sizeof(next[cnt]));
return cnt++;
}
void init() {
cnt = 0;
root = newNode();
memset(leaf, 0, sizeof(leaf));
}
void insert(char *s1, int id) {
int len = strlen(s1);
int cur = root;
for (int i = 0; i < len; ++i) {
int id = s1[i] - beginCh;
if (next[cur][id] == -1) {
next[cur][id] = newNode();
}
cur = next[cur][id];
}
leaf[cur] = id;
}
void build() {
queue<int>Q;
fail[root] = root;
for (int i = 0; i < maxch; ++i) {
if (next[root][i] == -1) next[root][i] = root;
else fail[next[root][i]] = root, Q.push(next[root][i]);
}
while (!Q.empty()) {
int cur = Q.front(); Q.pop();
for (int i = 0; i < maxch; ++i) {
if (next[cur][i] == -1) next[cur][i] = next[fail[cur]][i];
else {
fail[next[cur][i]] = next[fail[cur]][i];
Q.push(next[cur][i]);
}
}
}
}
void query(char *s1) {
int len = strlen(s1);
int cur = root;
for (int i = 0; i < len; ++i) {
int k = s1[i] - beginCh;
int son = next[cur][k];
cur = son;
while (son != root && leaf[son]) {
times[leaf[son]]++;
son = fail[son];
}
}
}
} ac;
char s1[2000005];
int main() {
// RE
int n;
while (scanf("%d%*c", &n) != EOF) {
ac.init();
memset(times, 0, sizeof(times));
for (int i = 1; i <= n; ++i) {
gets(word[i]);
ac.insert(word[i], i);
}
ac.build();
gets(s1);
ac.query(s1);
for (int i = 1; i <= n; ++i) {
if (times[i]) {
printf("%s: %d\n", word[i], times[i]);
}
}
}
return 0;
}
HDU 2896 病毒侵袭
结果要排序,所以不能在查询中直接输出,如下例子
3
ccc
bbb
aaa
2
aaabbbcccccc
bbaacc
输出
web 1: 1 2 3
total: 1
#include <cstdio>
#include <cmath>
#include <cstring>
#include <string>
#include <iostream>
#include <algorithm>
#include <queue>
using namespace std;
#define ll long long
#define clr( a , x ) memset ( a , x , sizeof (a) );
#define RE freopen("in.txt","r",stdin);
#define WE freopen("out.txt","w",stdout);
const int maxn = 500 * 200 + 5;
const int maxch = 100; //32-126 < 100
const char beginCh = ' '; //第一个可见字符
bool vis[502];
struct tree
{
int next[maxn][maxch];
int leaf[maxn];
int cnt, root;
int fail[maxn];
int newNode() {
memset(next[cnt], -1, sizeof(next[cnt]));
return cnt++;
}
void init() {
cnt = 0;
root = newNode();
memset(leaf, 0, sizeof(leaf));
}
void clrVis() {
memset(vis, 0, sizeof(vis));
}
void insert(char *s1, int id) {
int len = strlen(s1);
int cur = root;
for (int i = 0; i < len; ++i) {
int id = s1[i] - beginCh;
if (next[cur][id] == -1) {
next[cur][id] = newNode();
}
cur = next[cur][id];
}
leaf[cur] = id;
}
void build() {
queue<int>Q;
fail[root] = root;
for (int i = 0; i < maxch; ++i) {
if (next[root][i] == -1) next[root][i] = root;
else fail[next[root][i]] = root, Q.push(next[root][i]);
}
while (!Q.empty()) {
int cur = Q.front(); Q.pop();
for (int i = 0; i < maxch; ++i) {
if (next[cur][i] == -1) next[cur][i] = next[fail[cur]][i];
else {
fail[next[cur][i]] = next[fail[cur]][i];
Q.push(next[cur][i]);
}
}
}
}
int query(char *s1, int id) {
int len = strlen(s1);
int cur = root;
int has = 0;
for (int i = 0; i < len; ++i) {
int k = s1[i] - beginCh;
int son = next[cur][k];
cur = son;
while (son != root && leaf[son]) {
has = 1;
vis[leaf[son]] = true;
son = fail[son];
}
}
return has;
}
} ac;
int main() {
// RE
int idMax,n;
char s1[10005];
ac.init();
scanf("%d%*c", &idMax);
for (int i = 1; i <= idMax; ++i) {
gets(s1);
ac.insert(s1, i);
}
ac.build();
scanf("%d%*c", &n);
int cnt = 0;
for (int i = 1; i <= n; ++i) {
gets(s1);
ac.clrVis();
cnt += ac.query(s1, i);
bool first = true;
for (int id = 1; id <= idMax; ++id) {
if (vis[id]) {
if (first) {
printf("web %d: %d", i, id); first = false;
} else {
printf(" %d", id);
}
}
}
if(!first) printf("\n");
}
printf("total: %d\n", cnt);
return 0;
}