-
Notifications
You must be signed in to change notification settings - Fork 0
/
FragGabiEntry.py
35 lines (31 loc) · 1.36 KB
/
FragGabiEntry.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
from Question import Question
import requests
from bs4 import BeautifulSoup
class FragGabiEntry:
"""
Scraper for the Frag Gabi Section on maedchen.de
"""
answer = str()
question = Question()
def __init__(self, url):
"""
On object creation directly initiate scraping of given site
:param url: URL to a question on maedchen.de (Format like https://www.maedchen.de/love/frag-gabi/<something>)
"""
self.scrape_site(url)
def scrape_site(self, url):
"""
Request site and extract contents
:param url: URL to a question on maedchen.de (Format like https://www.maedchen.de/love/frag-gabi/<something>)
:return: True on success
"""
r = requests.get(url)
soup = BeautifulSoup(r.text, 'html.parser')
question_content_raw = soup.find_all(class_='question-detail')[0].get_text().strip()
self.question.content = question_content_raw.split('\n\n\n')[0].strip()
self.question.title = soup.find_all(class_='question-header')[0].get_text().strip()
author_date = question_content_raw.split('\n\n\n')[1].strip()[4:].split(' / ')
self.question.author = author_date[0]
self.question.set_date(author_date[1])
self.answer = soup.find_all(class_='question-answers__content--expert')[0].get_text(separator='\n')
return True