-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathproducthunt.py
66 lines (46 loc) · 1.75 KB
/
producthunt.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
from bs4 import BeautifulSoup
import requests
class ProductHunt(object):
def __init__(self):
pass
def find_link(self, company_name):
"""
Find the link of the company name of the
....args:
........company_name: String that specicifies the company name/website
"""
"""company_name.replace('.', '-')
print(company_name)
company_name.replace(" ", '-')
print(company_name)
company_name.replace("_", '-')
print(company_name)"""
potential_link_name = company_name
link_to_page = "https://www.producthunt.com/posts/" + potential_link_name
return link_to_page
def scrape_product_page(self, link_to_page):
"""
Scrape data and tags from the producthunt.com page of the company
....args:
........link_to_page: A string with a link to the company's page on producthunt.com
"""
product_source = requests.get(link_to_page)
print(product_source)
product_source_bs4 = BeautifulSoup(product_source.text, 'html.parser')
tags_found = product_source_bs4.findAll('span', {'class':'font_9d927 grey_bbe43 xSmall_1a46e normal_d2e66 topic_ca358 button_53e93 uppercase_a49b4'})
print(tags_found)
tags = list()
for tag in tags_found:
tag = str(tag.a)
tag = tag[tag.find('" title="') + len('" title="'):tag.find('">')]
print(tag)
tags.append(tag)
company_meta = product_source_bs4.find('h2', {'class':'font_9d927 grey_bbe43 small_231df normal_d2e66 headerPostTagline_98494'})
print(company_meta.string)
return_data = {'tags': tags, 'company_meta': company_meta.string}
def main():
P = ProductHunt()
link_to_page = P.find_link(company_name = "clarke-ai")
tags_or_meta = P.scrape_product_page(link_to_page)
if __name__ == "__main__":
main()