From a6239275ee717b5157817e46e2d8f6493051b695 Mon Sep 17 00:00:00 2001 From: Hunter Date: Fri, 6 Sep 2024 14:26:37 -0400 Subject: [PATCH] wikipedia; remove superfluous elements --- extensions/wikipedia/wikipedia.py | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/extensions/wikipedia/wikipedia.py b/extensions/wikipedia/wikipedia.py index a87e04f..f3627ae 100644 --- a/extensions/wikipedia/wikipedia.py +++ b/extensions/wikipedia/wikipedia.py @@ -211,6 +211,26 @@ def handle_wiki_page(title): for div in content_div.find_all('div', class_='refbegin'): div.decompose() + # Remove divs with class "quotebox" + for div in content_div.find_all('div', class_='quotebox'): + div.decompose() + + #remove tables with class "sidebar" + for table in soup.find_all('table', class_='sidebar'): + table.decompose() + + #remove tables with class "wikitable" + for table in soup.find_all('table', class_='wikitable'): + table.decompose() + + #remove tables with class "wikitable" + for table in soup.find_all('table', class_='mw-collapsible'): + table.decompose() + + #remove ul with class "gallery" + for ul in soup.find_all('ul', class_='gallery'): + ul.decompose() + # Remove tags for link in content_div.find_all('link'): link.decompose()