-
Notifications
You must be signed in to change notification settings - Fork 0
/
quora_answer.py
126 lines (111 loc) · 3.86 KB
/
quora_answer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
from selenium import webdriver
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support.expected_conditions import presence_of_element_located
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
#from bs4 import BeautifulSoup
import urllib.parse
import time
import pickle
import logging
options = Options()
options.add_argument('-headless')
logger = logging.getLogger('quora_answer')
logger.setLevel(logging.DEBUG)
ch_logger = logging.StreamHandler()
ch_logger.setLevel(logging.DEBUG)
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
ch_logger.setFormatter(formatter)
logger.addHandler(ch_logger)
delay = 3
def search_qn(driver, query):
link = f'https://www.quora.com/search?q={urllib.parse.quote_plus(query)}&type=question'
driver.get(link)
elements = driver.find_elements_by_xpath("//div/a[@class='question_link']")
return [element.get_attribute("href") for element in elements]
def get_answer(query):
driver = webdriver.Chrome(options=options)
driver.implicitly_wait(10)
attempts = 1
links = None
while attempts < 10:
logger.info(f'Attempt {attempts}/10 of getting links to answers')
try:
links = search_qn(driver, query)
#print("Retrieved links")
logger.info("Retrieved links to answers")
break
except Exception as e:
logger.info(f'Failed attempt {attempts}')
logger.error(f'{e}')
#print("Error:", e)
#tb = traceback.format_exc()
#print(tb)
attempts += 1
time.sleep(3)
if links is None:
# ran out all attempts
logger.error("Failed to retrieve links")
#print("Failed to retrieve links from quora.com")
driver.quit()
return "FAIL", ""
for link in links:
logger.info(f'Checking {link}')
# make sure is still on quora
if link[:22] == "https://www.quora.com/":
driver.get(link)
else:
logger.warning(f"Link {link} is not on Quora.com!")
continue
try:
answer_count = driver.find_element_by_xpath("//div/div[contains(@class, 'answer_count')]")
if answer_count.text == "0 Answers":
logger.info(f"No answers found on {link}")
continue
logger.info(f'{answer_count.text} Found')
except Exception as e:
# something went wrong
logger.error("Cannot retrieve answer count")
logger.error(f'{e}')
#tb = traceback.format_exc()
#print(tb)
continue
for _ in range(3):
try:
# try to click to view more
driver.find_element_by_xpath("//a[contains(@class, 'ui_qtext_more_link')]").click();
driver.find_element_by_xpath("//span[.='Continue Reading']").click();
logger.info("Cliked to view whole answer")
break
except Exception as e:
try:
driver.find_element_by_xpath("//span[.='Continue Reading']").click();
logger.info("Cliked to view whole answer")
break
except Exception as e:
# maybe dont have to click
logger.warning("Could not click to view more")
#logger.info("No need to click to view more")
#time.sleep(1)
#soup = BeautifulSoup(driver.page_source, 'html.parser')
try:
# try to return answer text
answer_text = driver.find_element_by_xpath("//div[contains(@class, 'ui_qtext_expanded')]").text
driver.quit()
return answer_text, link
except Exception as e:
# something went wrong
driver.save_screenshot("screenshot.png")
logger.error("Could not get answer")
logger.error(f'{e}')
#tb = traceback.format_exc()
#print(tb)
continue
# whyyy :'(
logger.error("Failed to retrieve any answer")
driver.quit()
return "FAIL :'(", ""
if __name__=="__main__":
query = input(">>> ")
print(get_answer(query))