diff --git a/apps/col_data.py b/apps/col_data.py index c883e474..5565dccf 100644 --- a/apps/col_data.py +++ b/apps/col_data.py @@ -1,5 +1,7 @@ from elasticsearch import Elasticsearch from elasticsearch_dsl import Search +import requests +from bs4 import BeautifulSoup class Dictionary: @@ -32,5 +34,20 @@ def save(self, word, data): self.es.index(index=self.es_index, id=word, body=data) +class VietnameseWiktionary: + """ Get data from Vietnamese Wiktionary + url: https://vi.wiktionary.org/ + """ + + @staticmethod + def get(word): + url = "https://vi.wiktionary.org/wiki/ban_%C4%91%E1%BA%A7u#Ti%E1%BA%BFng_Vi%E1%BB%87t" + r = requests.get(url) + soup = BeautifulSoup(r.content, "html.parser") + print(soup) + # senses = [] + # main_body = soup.select_one("#mw-content-text .mw-parser-output") + + if __name__ == '__main__': dictionary = Dictionary(es_index='dictionary') diff --git a/apps/col_dictionary_get_sense.py b/apps/col_dictionary_get_sense.py new file mode 100644 index 00000000..e69de29b diff --git a/apps/requirements.txt b/apps/requirements.txt new file mode 100644 index 00000000..20bdb475 --- /dev/null +++ b/apps/requirements.txt @@ -0,0 +1 @@ +beautifulsoup4==4.9.3 \ No newline at end of file diff --git a/apps/tests/test_col_data.py b/apps/tests/test_col_data.py new file mode 100644 index 00000000..96d676af --- /dev/null +++ b/apps/tests/test_col_data.py @@ -0,0 +1,10 @@ +# -*- coding: utf-8 -*- +from unittest import TestCase + +from apps.col_data import VietnameseWiktionary + + +class TestColData(TestCase): + def test_1(self): + word = "a" + VietnameseWiktionary.get(word)