Skip to content

Commit

Permalink
GH-485: ad wiktionary
Browse files Browse the repository at this point in the history
  • Loading branch information
anh.vu2 committed Sep 15, 2021
1 parent b293f8b commit 96edecc
Show file tree
Hide file tree
Showing 4 changed files with 28 additions and 0 deletions.
17 changes: 17 additions & 0 deletions apps/col_data.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
from elasticsearch import Elasticsearch
from elasticsearch_dsl import Search
import requests
from bs4 import BeautifulSoup


class Dictionary:
Expand Down Expand Up @@ -32,5 +34,20 @@ def save(self, word, data):
self.es.index(index=self.es_index, id=word, body=data)


class VietnameseWiktionary:
""" Get data from Vietnamese Wiktionary
url: https://vi.wiktionary.org/
"""

@staticmethod
def get(word):
url = "https://vi.wiktionary.org/wiki/ban_%C4%91%E1%BA%A7u#Ti%E1%BA%BFng_Vi%E1%BB%87t"
r = requests.get(url)
soup = BeautifulSoup(r.content, "html.parser")
print(soup)
# senses = []
# main_body = soup.select_one("#mw-content-text .mw-parser-output")


if __name__ == '__main__':
dictionary = Dictionary(es_index='dictionary')
Empty file.
1 change: 1 addition & 0 deletions apps/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
beautifulsoup4==4.9.3
10 changes: 10 additions & 0 deletions apps/tests/test_col_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
# -*- coding: utf-8 -*-
from unittest import TestCase

from apps.col_data import VietnameseWiktionary


class TestColData(TestCase):
def test_1(self):
word = "a"
VietnameseWiktionary.get(word)

0 comments on commit 96edecc

Please sign in to comment.