Skip to content

Commit

Permalink
Merge pull request #436 from manish2202/master
Browse files Browse the repository at this point in the history
Aho-Corasick algorithm implementation in Python
  • Loading branch information
geekquad authored Dec 16, 2020
2 parents b7458ea + d9be2e1 commit 2efc894
Show file tree
Hide file tree
Showing 5 changed files with 607 additions and 0 deletions.
65 changes: 65 additions & 0 deletions Aho-Corasick Algorithm/aho_corasick1.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
# Python implementation of Aho-Corasick string matching
FAIL = -1

def aho_corasick(string, keywords):
transitions = {}
outputs = {}
fails = {}

new_state = 0

for keyword in keywords:
state = 0

for j, char in enumerate(keyword):
res = transitions.get((state, char), FAIL)
if res == FAIL:
break
state = res

for char in keyword[j:]:
new_state += 1
transitions[(state, char)] = new_state
state = new_state

outputs[state] = [keyword]

queue = []
for (from_state, char), to_state in transitions.items():
if from_state == 0 and to_state != 0:
queue.append(to_state)
fails[to_state] = 0

while queue:
r = queue.pop(0)
for (from_state, char), to_state in transitions.items():
if from_state == r:
queue.append(to_state)
state = fails[from_state]

while True:
res = transitions.get((state, char), state and FAIL)
if res != FAIL:
break
state = fails[state]

failure = transitions.get((state, char), state and FAIL)
fails[to_state] = failure
outputs.setdefault(to_state, []).extend(
outputs.get(failure, []))

state = 0
results = []
for i, char in enumerate(string):
while True:
res = transitions.get((state, char), state and FAIL)
if res != FAIL:
state = res
break
state = fails[state]

for match in outputs.get(state, ()):
pos = i - len(match) + 1
results.append((pos, match))

return results
192 changes: 192 additions & 0 deletions Aho-Corasick Algorithm/requirements
Original file line number Diff line number Diff line change
@@ -0,0 +1,192 @@
alabaster==0.7.12
asn1crypto==0.24.0
astroid==2.2.5
astropy==3.2.1
attrs==19.1.0
Babel==2.7.0
backcall==0.1.0
backports.functools-lru-cache==1.5
backports.os==0.1.1
backports.shutil-get-terminal-size==1.0.0
backports.tempfile==1.0
bitarray==0.9.3
bkcharts==0.2
bleach==3.1.0
bokeh==1.2.0
boto==2.49.0
Bottleneck==1.2.1
certifi==2019.6.16
cffi==1.12.3
chardet==3.0.4
Click==7.0
cloudpickle==1.2.1
clyent==1.2.2
colorama==0.4.1
comtypes==1.1.7
contextlib2==0.5.5
cycler==0.10.0
Cython==0.29.12
cytoolz==0.10.0
dask==2.1.0
decorator==4.4.0
defusedxml==0.6.0
distributed==2.1.0
docutils==0.14
entrypoints==0.3
et-xmlfile==1.0.1
fastcache==1.1.0
filelock==3.0.12
Flask==1.1.1
future==0.17.1
gevent==1.4.0
glob2==0.7
greenlet==0.4.15
h5py==2.9.0
heapdict==1.0.0
idna==2.8
imageio==2.5.0
imagesize==1.1.0
importlib-metadata==0.17
ipykernel==5.1.1
ipython==7.6.1
ipython-genutils==0.2.0
ipywidgets==7.5.0
isort==4.3.21
itsdangerous==1.1.0
jdcal==1.4.1
jedi==0.13.3
Jinja2==2.10.1
joblib==0.13.2
json5==0.8.4
jsonschema==3.0.1
jupyter==1.0.0
jupyter-client==5.3.1
jupyter-console==6.0.0
jupyter-core==4.5.0
jupyterlab==1.0.2
jupyterlab-server==1.0.0
keyring==18.0.0
kiwisolver==1.1.0
lazy-object-proxy==1.4.1
libarchive-c==2.8
llvmlite==0.29.0
locket==0.2.0
lxml==4.3.4
MarkupSafe==1.1.1
matplotlib==3.1.0
mccabe==0.6.1
menuinst==1.4.16
mistune==0.8.4
mkl-fft==1.0.12
mkl-random==1.0.2
mkl-service==2.0.2
mock==3.0.5
more-itertools==7.0.0
mpmath==1.1.0
msgpack==0.6.1
multipledispatch==0.6.0
navigator-updater==0.2.1
nbconvert==5.5.0
nbformat==4.4.0
networkx==2.3
nltk==3.4.4
nose==1.3.7
notebook==6.0.0
numba==0.44.1
numexpr==2.6.9
numpy==1.16.4
numpydoc==0.9.1
olefile==0.46
openpyxl==2.6.2
packaging==19.0
pandas==0.24.2
pandocfilters==1.4.2
parso==0.5.0
partd==1.0.0
path.py==12.0.1
pathlib2==2.3.4
patsy==0.5.1
pep8==1.7.1
pickleshare==0.7.5
Pillow==6.1.0
pkginfo==1.5.0.1
pluggy==0.12.0
ply==3.11
prometheus-client==0.7.1
prompt-toolkit==2.0.9
psutil==5.6.3
py==1.8.0
pycodestyle==2.5.0
pycosat==0.6.3
pycparser==2.19
pycrypto==2.6.1
pycurl==7.43.0.3
pyflakes==2.1.1
Pygments==2.4.2
pylint==2.3.1
pyodbc==4.0.26
pyOpenSSL==19.0.0
pyparsing==2.4.0
pyreadline==2.1
pyrsistent==0.14.11
PySocks==1.7.0
pytest==5.0.1
pytest-arraydiff==0.3
pytest-astropy==0.5.0
pytest-doctestplus==0.3.0
pytest-openfiles==0.3.2
pytest-remotedata==0.3.1
python-dateutil==2.8.0
pytz==2019.1
PyWavelets==1.0.3
pywin32==223
pywinpty==0.5.5
PyYAML==5.1.1
pyzmq==18.0.0
QtAwesome==0.5.7
qtconsole==4.5.1
QtPy==1.8.0
requests==2.22.0
rope==0.14.0
ruamel-yaml==0.15.46
scikit-image==0.15.0
scikit-learn==0.21.2
scipy==1.2.1
seaborn==0.9.0
Send2Trash==1.5.0
simplegeneric==0.8.1
singledispatch==3.4.0.3
six==1.12.0
snowballstemmer==1.9.0
sortedcollections==1.1.2
sortedcontainers==2.1.0
soupsieve==1.8
spyder-kernels==0.5.1
SQLAlchemy==1.3.5
statsmodels==0.10.0
stopwords==0.1.3
sympy==1.4
tables==3.5.2
tblib==1.4.0
terminado==0.8.2
testpath==0.4.2
toolz==0.10.0
tornado==6.0.3
tqdm==4.32.1
traitlets==4.3.2
unicodecsv==0.14.1
urllib3==1.24.2
wcwidth==0.1.7
webencodings==0.5.1
Werkzeug==0.15.4
widgetsnbextension==3.5.0
win-inet-pton==1.1.0
win-unicode-console==0.5
wincertstore==0.2
wrapt==1.11.2
xlrd==1.2.0
XlsxWriter==1.1.8
xlwings==0.15.8
xlwt==1.3.0
zict==1.0.0
zipp==0.5.1
Loading

0 comments on commit 2efc894

Please sign in to comment.