AC自动机 - 多模式串匹配问题的基本运用 + 模板题 --- HDU 2222

简介: Keywords Search Time Limit: 2000/1000 MS (Java/Others)    Memory Limit: 65536/32768 K (Java/Others)Total Submission(s): 35655    Accepted Submission(...

Keywords Search

Time Limit: 2000/1000 MS (Java/Others)    Memory Limit: 65536/32768 K (Java/Others)
Total Submission(s): 35655    Accepted Submission(s): 11496


Problem Description
In the modern time, Search engine came into the life of everybody like Google, Baidu, etc.
Wiskey also wants to bring this feature to his image retrieval system.
Every image have a long description, when users type some keywords to find the image, the system will match the keywords with description of image and show the image which the most keywords be matched.
To simplify the problem, giving you a description of image, and some keywords, you should tell me how many keywords will be match.
 

 

Input
First line will contain one integer means how many cases will follow by.
Each case will contain two integers N means the number of keywords and N keywords follow. (N <= 10000)
Each keyword will only contains characters 'a'-'z', and the length will be not longer than 50.
The last line is the description, and the length will be not longer than 1000000.
 

 

Output
Print how many keywords are contained in the description.
 Sample Input
1
5
she
he
say
shr
her
yasherhs
Sample Output
3
 

 

Mean: 

 给你n个单词,再给你一篇文章,让你统计有多少个单词在文章中出现过。

analyse:

裸的AC自动机,模板题。

Time complexity:o(n)+o(ml)    n个模式串长度均不超过m,文本串长度为L

 

Source code:

 

// Memory   Time
// 1347K     0MS
// by : Snarl_jsb
// 2014-09-29-20.14
#include<algorithm>
#include<cstdio>
#include<cstring>
#include<cstdlib>
#include<iostream>
#include<vector>
#include<queue>
#include<stack>
#include<map>
#include<string>
#include<climits>
#include<cmath>
#define N 10010
#define LL long long
using namespace std;

namespace ac_auto
{
    char str[1000005];
    struct node
    {
        node *next[26];
        node *fail;
        int count;
        node()
        {
            for(int i = 0; i < 26; i++)
                next[i] = NULL;
            count = 0;
            fail = NULL;
        }
    }*q[50*N];
    node *root;
    int head, tail;

    void Insert(char *str)      //      插入单词
    {
        node *p = root;
        int i = 0, index;
        while(str[i]) {
            index = str[i] - 'a';
            if(p->next[index] == NULL)
                p->next[index] = new node();
            p = p->next[index];
            i++;
        }
        p->count++;
    }
    void build_ac_automation(node *root)        //      bfs建立fail指针
    {
        root->fail = NULL;
        q[tail++] = root;
        while(head < tail) {
            node *temp = q[head++];
            node *p = NULL;
            for(int i = 0; i < 26; i++) {
                if(temp->next[i] != NULL) {
                    if(temp == root) temp->next[i]->fail = root;
                    else {
                        p = temp->fail;
                        while(p != NULL) {
                            if(p->next[i] != NULL) {
                                temp->next[i]->fail = p->next[i];
                                break;
                            }
                            p = p->fail;
                        }
                        if(p == NULL) temp->next[i]->fail = root;
                    }
                    q[tail++] = temp->next[i];
                }
            }
        }
    }
    int Query(node *root)       //  匹配 + 统计
    {
        int i = 0, cnt = 0, index;
        node *p = root;
        while(str[i]) {
            index = str[i] - 'a';
            while(p->next[index] == NULL && p != root) p = p->fail;
            p = p->next[index];
            if(p == NULL) p = root;
            node *temp = p;
            while(temp != root && temp->count != -1) {
                cnt += temp->count;
                temp->count = -1;
                temp = temp->fail;
            }
            i++;
        }
        return cnt;
    }
}
using namespace ac_auto;


int main()
{
    int T, n;
    scanf("%d",&T);
    while(T--)
    {
        head = tail = 0;    //  清零
        root = new node();      //  申请新的root结点
        scanf("%d",&n);
        while(n--)
        {
            scanf("%s", str);
            Insert(str);    //  插入单词
        }
        build_ac_automation(root);      //  建树
        scanf("%s",str);
        printf("%d\n", Query(root));        //      查找+统计
    }
    return 0;
}

 

附上注释版(博主很贴心的有木有=.=)

#include<cstdio>

const int N = 10010;
char str[1000005];
struct node
{
    node *next[26];     //  每个结点都对应26个字母的指针
    node *fail;     //      失配指针
    int count;      //
    node()      //  构造函数初始化
    {
        for(int i = 0; i < 26; i++)
            next[i] = NULL;
        count = 0;
        fail = NULL;
    }
}*q[50*N];
node *root;
int head, tail;

void Insert(char *str) //   插入单词.相当于构建一个Trie树
{
    node *p = root;
    int i = 0, index;
    while(str[i]) {
        index = str[i] - 'a'; //  转化为相对数字来存
        if(p->next[index] == NULL) // 该字母未插入过
            p->next[index] = new node();    //  为该字母申请一个结点
        p = p->next[index];     //   移至下一个
        i++;
    }
    p->count++;     //      记录该结点的单词总共插入的次数
}
void build_ac_automation(node *root)        //      bfs建立fail指针
{
    root->fail = NULL;
    q[tail++] = root;
    while(head < tail) {
        node *temp = q[head++];
        node *p = NULL;
        for(int i = 0; i < 26; i++) {
            if(temp->next[i] != NULL) {
                if(temp == root) temp->next[i]->fail = root;
                else {
                    p = temp->fail;
                    while(p != NULL) {
                        if(p->next[i] != NULL) {
                            temp->next[i]->fail = p->next[i];
                            break;
                        }
                        p = p->fail;
                    }
                    if(p == NULL) temp->next[i]->fail = root;
                }
                q[tail++] = temp->next[i];
            }
        }
    }
}

int Query(node *root)       //  匹配 + 统计
{
    int i = 0, cnt = 0, index;
    node *p = root;
    while(str[i])
    {
        index = str[i] - 'a';
        while(p->next[index] == NULL && p != root) //前缀是相同的,所以不管哪个指针走到了count不为0的结点上,那么该结点所代表的单词就匹配成功
            p = p->fail;//失配情况下,p指针指向p->fail.(相当于KMP的next数组)
        p = p->next[index];//由于现在所在的位置是父节点,所以需要向下移动一个位置
        if(p == NULL)
            p = root; //如果匹配失败,移动到root,重新开始匹配
        node *temp = p;//
        while(temp != root && temp->count != -1)  //统计--如果匹配成功,那么count>1,表示该结点代表的单词数量;否则表示该结点没有单词
        {
            cnt += temp->count; //统计该单词出现的次数
            temp->count = -1;   //标记为-1,表示该单词已经加入了cnt中,下次就不用重复统计
            temp = temp->fail;//判断整条链上的匹配情况
        }
        i++;
    }
    return cnt;
}

int main()
{
    int T, n;
    scanf("%d",&T);
    while(T--)
    {
        head = tail = 0;    //  清零
        root = new node();      //  申请新的root结点
        scanf("%d",&n);
        while(n--)
        {
            scanf("%s", str);
            Insert(str);    //  插入单词
        }
        build_ac_automation(root);      //  建树
        scanf("%s",str);
        printf("%d\n", Query(root));        //      查找+统计
    }
    return 0;
}

 后来交了一发G++才发现会超内存,毕竟每次Trie树申请的内存没释放,内存开销还是有点大。同样交C++就没事,说明C++的内存回收机制比G++要好很多。

但是自己写了个手动释放内存的函数也还是无济于事。后来又把BFS的静态队列改为了动态队列,同样也没多大作用。

/*
* this code is made by crazyacking
* Verdict: Accepted
* Submission Date: 2015-07-17-13.54
* Time: 0MS
* Memory: 137KB
*/
#include <queue>
#include <cstdio>
#include <set>
#include <string>
#include <stack>
#include <cmath>
#include <climits>
#include <map>
#include <cstdlib>
#include <iostream>
#include <vector>
#include <algorithm>
#include <cstring>
#define  LL long long
#define  ULL unsigned long long
using namespace std;
const int N = 10010;
const int M = 10 + int( 1e6 );
class Node
{
public:
      int cnt;
      Node *next[26], *fail;
      Node()
      {
            cnt = 0;
            for( int i = 0; i < 26; ++i ) next[i] = NULL;
            fail = NULL;
      }
};
queue<Node*> q;
//Node *q[50 * N];
Node *root;
int head, tail;
char str[M];

void Insert( char *str ) // build Trie-Tree
{
      Node *p = root;
      int i = 0, index;
      while( str[i] )
      {
            index = str[i] - 'a';
            if( p->next[index] == NULL )
                  p->next[index] = new Node();
            p = p->next[index];
            ++i;
      }
      p->cnt++;
}

void build_ac_automation( Node *root ) // build fail ptr
{
      root->fail = NULL;
      while(!q.empty()) q.pop();
      q.push(root);
//      q[tail++] = root;
      while( !q.empty() )
      {
            Node *temp=q.front();
            q.pop();
//            Node *temp = q[head++];
            Node *p = NULL;
            for( int i = 0; i < 26; ++i )
            {
                  if( temp->next[i] != NULL )
                  {
                        if( temp == root ) temp->next[i]->fail = root;
                        else
                        {
                              p = temp->fail;
                              while( p != NULL )
                              {
                                    if( p->next[i] != NULL )
                                    {
                                          temp->next[i]->fail = p->next[i];
                                          break;
                                    }
                                    p = p->fail;
                              }
                              if( p == NULL ) temp->next[i]->fail = root;
                        }
//                        q[tail++] = temp->next[i];
                        q.push(temp->next[i]);
                  }
            }
      }
}


int Query( Node *root )  // mathing and count
{
      int i = 0, ans = 0, index;
      Node *p = root;
      while( str[i] )
      {
            index = str[i] - 'a';
            while( p->next[index] == NULL && p != root )
                  p = p->fail;
            p = p->next[index];
            if( p == NULL )
                  p = root;
            Node *temp = p;
            while( temp != root && temp->cnt != -1 )
            {
                  ans += temp->cnt;
                  temp->cnt = -1;
                  temp = temp->fail;
            }
            i++;
      }
      return ans;
}

/**< 开始交G++一直超内存,交C++就过了,看来C++回收内粗的机制做得不错。 */
/**< 手动释放内存也还会超,看来内存占用不在这儿,应该是BFS的静态队列 */
void free_malloc(Node *root)
{
      queue<Node*> Q;
      Q.push(root);
      while(!Q.empty())
      {
            Node* p=(Q.front());
            Q.pop();
            for(int i=0;i<26;++i)
            {
                  if(p->next[i]!=NULL)
                  {
                        Q.push((Node*) (p->next[i]));
                  }
            }
            delete(p);
      }
      root=NULL;
}


int main()
{
      ios_base::sync_with_stdio( false );
      cin.tie( 0 );
      int Cas;
      scanf( "%d", &Cas );
      while( Cas-- )
      {
            int n;
            head = tail = 0;
            root = new Node();
            scanf( "%d", &n );
            while( n-- )
            {
                  scanf( "%s", str );
                  Insert( str );
            }
            build_ac_automation( root );
            scanf( "%s", str );
            printf( "%d\n", Query( root ) );
            free_malloc(root);
      }
      return 0;
}
/*

*/

 C++类的成员函数会对函数内new申请的空间进行回收,但是写了类依然逃脱不了G++超时的诅咒。

/*
* this code is made by crazyacking
* Verdict: Accepted
* Submission Date: 2015-07-17-16.17
* Time: 0MS
* Memory: 137KB
*/
#include <queue>
#include <cstdio>
#include <set>
#include <string>
#include <stack>
#include <cmath>
#include <climits>
#include <map>
#include <cstdlib>
#include <iostream>
#include <vector>
#include <algorithm>
#include <cstring>
#define  LL long long
#define  ULL unsigned long long
using namespace std;

char str[1000005];
struct node
{
      node *next[26];
      node *fail;
      int count;
      node()
      {
            for( int i = 0; i < 26; i++ )
                  next[i] = NULL;
            count = 0;
            fail = NULL;
      }
};
queue<node*> q;
node *root;


class AC_AUTO_CLASS
{
public:
      void Insert( char *str )    //      插入单词
      {
            node *p = root;
            int i = 0, index;
            while( str[i] )
            {
                  index = str[i] - 'a';
                  if( p->next[index] == NULL )
                        p->next[index] = new node();
                  p = p->next[index];
                  i++;
            }
            p->count++;
      }
};
void build_ac_automation( node *root )      //      bfs建立fail指针
{
      root->fail = NULL;
      while( !q.empty() ) q.pop();
      q.push( root );
      while( !q.empty() )
      {
            node *temp = q.front(); q.pop();
            node *p = NULL;
            for( int i = 0; i < 26; i++ )
            {
                  if( temp->next[i] != NULL )
                  {
                        if( temp == root ) temp->next[i]->fail = root;
                        else
                        {
                              p = temp->fail;
                              while( p != NULL )
                              {
                                    if( p->next[i] != NULL )
                                    {
                                          temp->next[i]->fail = p->next[i];
                                          break;
                                    }
                                    p = p->fail;
                              }
                              if( p == NULL ) temp->next[i]->fail = root;
                        }
                        q.push( temp->next[i] );
                  }
            }
      }
}


int Query( node *root )     //  匹配 + 统计
{
      int i = 0, cnt = 0, index;
      node *p = root;
      while( str[i] )
      {
            index = str[i] - 'a';
            while( p->next[index] == NULL && p != root ) p = p->fail;
            p = p->next[index];
            if( p == NULL ) p = root;
            node *temp = p;
            while( temp != root && temp->count != -1 )
            {
                  cnt += temp->count;
                  temp->count = -1;
                  temp = temp->fail;
            }
            i++;
      }
      return cnt;
}


int main()
{
      int T, n;
      scanf( "%d", &T );
      while( T-- )
      {
            root = new node();      //  申请新的root结点
            scanf( "%d", &n );
            {
                  AC_AUTO_CLASS ac_auto_class;
                  while( n-- )
                  {
                        scanf( "%s", str );
                        ac_auto_class.Insert( str );  //  插入单词
                  }
                  build_ac_automation( root );    //  建树
                  scanf( "%s", str );
                  printf( "%d\n", Query( root ) );    //      查找+统计
            }
      }
      return 0;
}

  

目录
相关文章
|
算法
递归题目练习---N皇后问题
递归题目练习---N皇后问题
109 0
递归题目练习---N皇后问题
递归题目练习---数独问题
递归题目练习---数独问题
110 0
递归题目练习---数独问题
|
机器学习/深度学习 算法
【递归与递推 4】AcWing95. 费解的开关 、AcWing 93. 递归实现组合型枚举、AcWing 1209. 带分数、AcWing 1208. 翻硬币
【递归与递推 4】AcWing95. 费解的开关 、AcWing 93. 递归实现组合型枚举、AcWing 1209. 带分数、AcWing 1208. 翻硬币
156 0
【CCCC】L3-008 喊山 (30分),BFS搜索最长路,水题
【CCCC】L3-008 喊山 (30分),BFS搜索最长路,水题
107 0
|
Java BI 机器学习/深度学习
|
Java
HDU 1711 Number Sequence(KMP裸题,板子题,有坑点)
Number Sequence Time Limit: 10000/5000 MS (Java/Others)    Memory Limit: 32768/32768 K (Java/Others)Total Submission(s): 27028    Accepted Submission...
1021 0
|
算法 人工智能
NYOJ 题目77 开灯问题(简单模拟)
开灯问题         时间限制:3000 ms  |            内存限制:65535 KB         难度:1                            描述        有n盏灯,编号为1~n,第1个人把所有灯打开,第2个人按下所有编号为2 的倍数的开关(这些灯将被关掉),第3 个人按下所有编号为3的倍数的开关(其中关掉的灯将被打开,开着的灯将被关闭),依此类推。
1309 0
模板题 + KMP + 求最小循环节 --- HDU 3746 Cyclic Nacklace
Cyclic Nacklace  Problem's Link:  http://acm.hdu.edu.cn/showproblem.php?pid=3746   Mean:  给你一个字符串,让你在后面加尽量少的字符,使得这个字符串成为一个重复串。
1230 0
搜索 --- 数独求解 POJ 2676 Sudoku
Sudoku Problem's Link:   http://poj.org/problem?id=2676   Mean:  略   analyse: 记录所有空位置,判断当前空位置是否可以填某个数,然后直接DFS,注意从后往前搜索,时间比正向搜快很多。
893 0