1+ from collections import deque , defaultdict
2+
3+ class TrieNode :
4+ def __init__ (self ):
5+ self .children = {}
6+ self .failure = None
7+ self .output = []
8+ self .is_end_of_word = False
9+
10+ class AhoCorasickMatcher :
11+ def __init__ (self ):
12+ self .root = TrieNode ()
13+
14+ def build_trie (self , patterns ):
15+ """Build the trie structure with all patterns"""
16+ for pattern in patterns :
17+ if not pattern : # Skip empty patterns
18+ continue
19+ current = self .root
20+ for char in pattern :
21+ if char not in current .children :
22+ current .children [char ] = TrieNode ()
23+ current = current .children [char ]
24+ current .is_end_of_word = True
25+ current .output .append (pattern )
26+
27+ def build_failure_function (self ):
28+ """Build the failure function using BFS"""
29+ queue = deque ()
30+
31+ # Initialize failure function for depth 1 nodes
32+ for child in self .root .children .values ():
33+ child .failure = self .root
34+ queue .append (child )
35+
36+ # Build failure function for remaining nodes
37+ while queue :
38+ current = queue .popleft ()
39+
40+ for char , child in current .children .items ():
41+ queue .append (child )
42+
43+ # Find the failure link
44+ failure = current .failure
45+ while failure is not None and char not in failure .children :
46+ failure = failure .failure
47+
48+ if failure is not None :
49+ child .failure = failure .children [char ]
50+ else :
51+ child .failure = self .root
52+
53+ # Add output patterns from failure node
54+ child .output .extend (child .failure .output )
55+
56+ def search_patterns (self , text , patterns , case_sensitive = False ):
57+ """
58+ Search for multiple patterns simultaneously in text.
59+ Returns a dictionary mapping each pattern to list of starting positions.
60+ """
61+ if not patterns or not text :
62+ return {pattern : [] for pattern in patterns }
63+
64+ # Convert to lowercase if case insensitive
65+ if not case_sensitive :
66+ text = text .lower ()
67+ patterns = [pattern .lower () for pattern in patterns ]
68+
69+ # Build the automaton
70+ self .build_trie (patterns )
71+ self .build_failure_function ()
72+
73+ # Initialize results dictionary
74+ results = {pattern : [] for pattern in patterns }
75+
76+ # Search through the text
77+ current = self .root
78+
79+ for i , char in enumerate (text ):
80+ # Follow failure links until we find a match or reach root
81+ while current is not None and char not in current .children :
82+ current = current .failure
83+
84+ if current is None :
85+ current = self .root
86+ continue
87+
88+ current = current .children [char ]
89+
90+ # Check for pattern matches
91+ for pattern in current .output :
92+ start_pos = i - len (pattern ) + 1
93+ results [pattern ].append (start_pos )
94+
95+ return results
96+
97+ def search_pattern (self , text , pattern , case_sensitive = False ):
98+ """
99+ Search for a single pattern in text.
100+ Returns a list of starting positions (compatible with KMP/BM interface).
101+ """
102+ if not pattern or not text :
103+ return []
104+
105+ result = self .search_patterns (text , [pattern ], case_sensitive )
106+ return result .get (pattern , [])
107+
108+ def count_patterns (self , text , patterns , case_sensitive = False ):
109+ results = self .search_patterns (text , patterns , case_sensitive )
110+ return {pattern : len (positions ) for pattern , positions in results .items ()}
111+
112+ def find_first_occurrence (self , text , patterns , case_sensitive = False ):
113+ if not patterns or not text :
114+ return None , - 1
115+
116+ # Convert to lowercase if case insensitive
117+ if not case_sensitive :
118+ text = text .lower ()
119+ patterns = [pattern .lower () for pattern in patterns ]
120+
121+ # Build the automaton
122+ self .build_trie (patterns )
123+ self .build_failure_function ()
124+
125+ # Search for first match
126+ current = self .root
127+
128+ for i , char in enumerate (text ):
129+ # Follow failure links until we find a match or reach root
130+ while current is not None and char not in current .children :
131+ current = current .failure
132+
133+ if current is None :
134+ current = self .root
135+ continue
136+
137+ current = current .children [char ]
138+
139+ # Check for pattern matches
140+ if current .output :
141+ pattern = current .output [0 ] # Return first pattern found
142+ start_pos = i - len (pattern ) + 1
143+ return pattern , start_pos
144+
145+ return None , - 1
0 commit comments