From d9be2e179fac0de067f37dba5962c5461bc1975f Mon Sep 17 00:00:00 2001 From: Manish Kumar Paul <56110316+manish2202@users.noreply.github.com> Date: Wed, 16 Dec 2020 23:44:41 +0530 Subject: [PATCH] Add files via upload code --- Aho-Corasick Algorithm/aho_corasick1.py | 65 +++++++++++++++++++++++++ 1 file changed, 65 insertions(+) create mode 100644 Aho-Corasick Algorithm/aho_corasick1.py diff --git a/Aho-Corasick Algorithm/aho_corasick1.py b/Aho-Corasick Algorithm/aho_corasick1.py new file mode 100644 index 000000000..c9a269d53 --- /dev/null +++ b/Aho-Corasick Algorithm/aho_corasick1.py @@ -0,0 +1,65 @@ +# Python implementation of Aho-Corasick string matching +FAIL = -1 + +def aho_corasick(string, keywords): + transitions = {} + outputs = {} + fails = {} + + new_state = 0 + + for keyword in keywords: + state = 0 + + for j, char in enumerate(keyword): + res = transitions.get((state, char), FAIL) + if res == FAIL: + break + state = res + + for char in keyword[j:]: + new_state += 1 + transitions[(state, char)] = new_state + state = new_state + + outputs[state] = [keyword] + + queue = [] + for (from_state, char), to_state in transitions.items(): + if from_state == 0 and to_state != 0: + queue.append(to_state) + fails[to_state] = 0 + + while queue: + r = queue.pop(0) + for (from_state, char), to_state in transitions.items(): + if from_state == r: + queue.append(to_state) + state = fails[from_state] + + while True: + res = transitions.get((state, char), state and FAIL) + if res != FAIL: + break + state = fails[state] + + failure = transitions.get((state, char), state and FAIL) + fails[to_state] = failure + outputs.setdefault(to_state, []).extend( + outputs.get(failure, [])) + + state = 0 + results = [] + for i, char in enumerate(string): + while True: + res = transitions.get((state, char), state and FAIL) + if res != FAIL: + state = res + break + state = fails[state] + + for match in outputs.get(state, ()): + pos = i - len(match) + 1 + results.append((pos, match)) + + return results \ No newline at end of file