Chapter 110
HomeKMP matches one pattern. Aho-Corasick matches all K patterns in O(N + total pattern length) — a trie with failure links (like KMP's LPS). Essential for malware signatures, bad-word filtering, and bioinformatics.
struct AhoCorasick {
static const int ALPH = 26;
struct Node {
int next[ALPH], fail;
int output; // number of patterns ending here
Node() : fail(0), output(0) {
memset(next, 0, sizeof(next));
}
};
vector trie;
AhoCorasick() { trie.emplace_back(); }
void insert(string& s) {
int v = 0;
for (char c : s) {
int idx = c - 'a';
if (!trie[v].next[idx]) {
trie[v].next[idx] = trie.size();
trie.emplace_back();
}
v = trie[v].next[idx];
}
trie[v].output++;
}
void build() { // BFS failure links
queue q;
for (int c = 0; c < ALPH; c++)
if (trie[0].next[c]) q.push(trie[0].next[c]);
while (!q.empty()) {
int v = q.front(); q.pop();
for (int c = 0; c < ALPH; c++) {
int u = trie[v].next[c];
if (u) {
trie[u].fail = trie[trie[v].fail].next[c];
trie[u].output += trie[trie[u].fail].output;
q.push(u);
} else {
trie[v].next[c] = trie[trie[v].fail].next[c];
}
}
}
}
int count_matches(string& text) {
int v = 0, ans = 0;
for (char c : text) {
v = trie[v].next[c - 'a'];
ans += trie[v].output;
}
return ans;
}
};
// Build: O(total pattern length). Query: O(text length).