基本结构
字典树(Trie树)是一种由“结点”和“带有字符的边”构成的树形结构。
典型应用是用于统计和排序大量的字符串(但不仅限于字符串),经常被搜索引擎系统用于文本词频统计。
它的优点是:最大限度地减少无谓的字符串比较,查询效率比哈希表高。
基本性质
- 结点本身不保存完整单词
- 从根结点到某一结点,路径上经过的字符连接起来,为该结点对应的单词
- 每个结点出发的所有边代表的字符都不相同
- 结点用于存储单词的额外信息(例如频次)
内部实现
字符集数组法(简单)
每个结点保存一个长度固定为字符集大小(例如26)的数组,以字符为下标,保存指向的结点空间复杂度为O(结点数*字符集大小),查询的时间复杂度为O(单词长度)
适用于较小字符集,或者单词短、分布稠密的字典
字符集映射法(优化)
把每个结点上的字符集数组改为一个映射(词频统计: hash map,排序: ordered map)空间复杂度为O(文本字符总数),查询的时间复杂度为O(单词长度),但常数稍大一些适用性更广
核心思想
Trie树的核心思想是空间换时间
无论是保存树的结构、字符集数组还是字符集映射,都需要额外的空间
利用字符串的公共前缀来降低查询时间的开销以达到提高效率的目的
分组思想——前缀相同的字符串在同一子树中
代码实现
class Trie { public: Trie() { root = new Node(); } void insert(string word) { find(word, true, true); } bool search(string word) { return find(word, true, false); } bool startsWith(string prefix) { return find(prefix, false, false); } private: struct Node { int count; unordered_map<char, Node *> child; Node() : count(0) {} }; Node *root; bool find(const string &s, bool exact_match, bool insert_if_not_exist) { Node *curr = root; for (char c : s) { if (curr->child.find(c) == curr->child.end()) { if (!insert_if_not_exist) return false; curr->child[c] = new Node(); } curr = curr->child[c]; } if (insert_if_not_exist) curr->count++; return exact_match ? curr->count > 0 : true; } };
实战
208.实现Trie(前缀树)
https://leetcode.cn/problems/implement-trie-prefix-tree/
class Trie { public: Trie() { root = new Node(); } void insert(string word) { find(word, true, true); } bool search(string word) { return find(word, true, false); } bool startsWith(string prefix) { return find(prefix, false, false); } private: struct Node { int count; unordered_map<char, Node *> child; Node() : count(0) {} }; Node *root; bool find(const string &s, bool exact_match, bool insert_if_not_exist) { Node *curr = root; for (char c : s) { if (curr->child.find(c) == curr->child.end()) { if (!insert_if_not_exist) return false; curr->child[c] = new Node(); } curr = curr->child[c]; } if (insert_if_not_exist) curr->count++; return exact_match ? curr->count > 0 : true; } };
class Trie { private: bool isEnd; Trie* next[26]; public: Trie() { isEnd = false; memset(next, 0, sizeof(next)); } void insert(string word) { Trie* node = this; for (char c : word) { if (node->next[c-'a'] == NULL) { node->next[c-'a'] = new Trie(); } node = node->next[c-'a']; } node->isEnd = true; } bool search(string word) { Trie* node = this; for (char c : word) { node = node->next[c - 'a']; if (node == NULL) { return false; } } return node->isEnd; } bool startsWith(string prefix) { Trie* node = this; for (char c : prefix) { node = node->next[c-'a']; if (node == NULL) { return false; } } return true; } };
212.单词搜索||
https://leetcode.cn/problems/word-search-ii/
class Solution { public: vector<string> findWords(vector<vector<char>>& board, vector<string>& words) { root = new Node(); for(const string& word : words) { insert(word); } m = board.size(); n = board[0].size(); visit = vector<vector<bool>>(m, vector<bool>(n, false)); for (int i = 0;i < m; i++ ){ for (int j = 0; j < n; j++){ visit[i][j] = true; dfs(board, i, j, root); visit[i][j] = false; } } return vector<string>(ans.begin(), ans.end()); } private: const int dx[4] = {-1, 0, 0, 1}; const int dy[4] = {0, -1, 1, 0}; struct Node { int count; unordered_map<char, Node*> child; Node() : count(0) {} }; Node* root; int m, n; vector<vector<bool>> visit; string str; unordered_set<string> ans; void dfs(vector<vector<char>>& board, int x, int y, Node* curr) { if (curr == nullptr) return; char ch = board[x][y]; if (curr->child.find(ch) == curr->child.end()) return; Node* next = curr->child[ch]; str.push_back(ch); if (next->count > 0) ans.insert(str); if(next->child.empty()) { curr->child.erase(ch); delete next; }else { for (int i = 0; i < 4; i++) { int nx = x + dx[i]; int ny = y + dy[i]; if (nx < 0 || ny < 0 || nx >= m || ny >= n) continue; if (visit[nx][ny]) continue; visit[nx][ny] = true; dfs(board ,nx ,ny ,next); visit[nx][ny] = false; } } str.pop_back(); } void insert(const string& s) { Node* curr = root; for (char c : s) { if (curr->child.find(c) == curr->child.end()) { curr->child[c] = new Node(); } curr = curr->child[c]; } curr->count++; } };
推荐一个零声学院免费公开课程,个人觉得老师讲得不错,分享给大家:Linux,Nginx,ZeroMQ,MySQL,Redis,fastdfs,MongoDB,ZK,流媒体,CDN,P2P,K8S,Docker,TCP/IP,协程,DPDK等技术内容,立即学习