Skip to content

Commit

Permalink
wikipedia; remove superfluous elements
Browse files Browse the repository at this point in the history
  • Loading branch information
hunterirving committed Sep 6, 2024
1 parent a655c92 commit a623927
Showing 1 changed file with 20 additions and 0 deletions.
20 changes: 20 additions & 0 deletions extensions/wikipedia/wikipedia.py
Original file line number Diff line number Diff line change
Expand Up @@ -211,6 +211,26 @@ def handle_wiki_page(title):
for div in content_div.find_all('div', class_='refbegin'):
div.decompose()

# Remove divs with class "quotebox"
for div in content_div.find_all('div', class_='quotebox'):
div.decompose()

#remove tables with class "sidebar"
for table in soup.find_all('table', class_='sidebar'):
table.decompose()

#remove tables with class "wikitable"
for table in soup.find_all('table', class_='wikitable'):
table.decompose()

#remove tables with class "wikitable"
for table in soup.find_all('table', class_='mw-collapsible'):
table.decompose()

#remove ul with class "gallery"
for ul in soup.find_all('ul', class_='gallery'):
ul.decompose()

# Remove <link> tags
for link in content_div.find_all('link'):
link.decompose()
Expand Down

0 comments on commit a623927

Please sign in to comment.