-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathproduction_paradise.py
41 lines (35 loc) · 1023 Bytes
/
production_paradise.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
from bs4 import BeautifulSoup
import requests
import re
req_url = "http://www.productionparadise.com/search/index.php?t=members&country_id=&category_id=&q="
r = requests.get(req_url)
data = r.text
soup = BeautifulSoup(data,'lxml')
pages = soup.find_all('div',{"class":"paginator"})
pages = str(pages)
pages = pages[pages.find('a class="nextprev"')-10:pages.find('a class="nextprev"')-1]
#print pages
# page = range(1,24)
# for i in page:
# print req_url + "&page=" + str(i)
# matchObj = r'\>(.*?)\<'
# if re.search(matchObj, pages):
# print "as"
r = requests.get(req_url)
data = r.text
soup = BeautifulSoup(data,'lxml')
for row in soup.find_all('div',attrs={"class" : "data"}):
print row.text
# data = row.text.splitlines()
# print "\n\n\n\n\\n"
# writer = Writer(row.text)
# data = filter(None,data)
# print type(data)
# writer = Writer(data)
# writer.get_name(data[0])
# writer.get_phone(data[2])
# try:
# writer.get_website(data[4])
# except IndexError:
# writer.get_website(" ")
#print pages