Trie树,即字典树,是一种树形结构,是一种哈希树的变种。主要用来统计和排序大量的字符串,所以经常被搜索引擎系统用于文本词频统计。它的优点是:最大限度地减少无谓的字符串比较。
Trie的核心思想是空间换时间。利用字符串的公共前缀来降低查询时间的开销以达到提高效率的目的。
这里提供前缀树的封装接口:增加,删除,查询有无指定字符串及模糊查询。
#include <iostream> #include <cstring> #include <string> #include <vector> using namespace std; const int SIZE = 26; struct TrieNode { bool isEnd = false; int count = 0; string word = ""; struct TrieNode* children[SIZE]; }; class Trie { public: Trie(); ~Trie(); void destroyNode(TrieNode *& node);// only pass TrieNode* cannot make node = nullptr void addWord(const std::string& word); void removeWord(const std::string& word); bool hasWord(const std::string& word) const; void getWords(TrieNode *node, vector<std::string> &vecStr) const; std::vector<std::string> emumerateWords(const std::string& prefix) const; private: TrieNode *root = nullptr; TrieNode *createTrieNode(); }; Trie::Trie(){ root = new TrieNode(); } Trie::~Trie(){ destroyNode(root); } void Trie::destroyNode(TrieNode *& node){ if(node == nullptr) return; for(int i = 0;i < SIZE; i++){ destroyNode(node->children[i]); } delete node; node = nullptr;// if use TrieNode* this code cannot work well } TrieNode* Trie::createTrieNode(){ TrieNode *pNode = new TrieNode(); for(int i=0; i < SIZE; ++i) pNode->children[i] = nullptr; return pNode; } void Trie::addWord(const std::string& word){ TrieNode *node = root; int index = 0; for(auto ch : word){ index = ch - 'a'; if(node->children[index] == nullptr){ node->children[index] = createTrieNode(); } node = node->children[index]; node->count ++; } node->isEnd = true; node->word = word; } void Trie::removeWord(const std::string& word){ if(!hasWord(word)){ std::cout << word <<" not in data set, please check" << std::endl; return; } TrieNode *node = root; int index = 0; for(auto ch : word){ index = ch - 'a'; node = node->children[index]; if(--node->count == 0){ destroyNode(node); return; } } node->isEnd = false; } bool Trie::hasWord(const std::string& word) const{ TrieNode *node = root; int index = 0; for(auto ch : word){ index = ch - 'a'; if(node->children[index] != nullptr) node = node->children[index]; else return false; } return node->isEnd; } void Trie::getWords(TrieNode* node, vector<string> &vecStr) const{ if(node == nullptr) return ; if(node->isEnd) vecStr.push_back(node->word); for(int i = 0; i < SIZE; i++){ getWords(node->children[i],vecStr); } } std::vector<std::string> Trie::emumerateWords(const std::string& prefix) const{ TrieNode *node = root; std::vector<std::string> res; int index = 0; for(auto ch : prefix){ index = ch - 'a'; if(node->children[index] != nullptr) node = node->children[index]; else return res; } getWords(node,res); return res; }
其中最重要,也是最容易发生错误的接口是删除操作,如操作不当容易内存泄漏。查找了很多文章,析构函数均写得有问题,这里提供完整的功能接口。笔试题可能会遇到。