-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.py
73 lines (53 loc) · 1.89 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
import os
import shutil
import requests
from tqdm import tqdm
from bs4 import BeautifulSoup
BaseCompanyURL = "https://www.set.or.th"
BaseURL = "https://www.set.or.th/set/commonslookup.do?language=th&country=TH&prefix="
AllPrefixes = ["NUMBER", "A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K",
"L", "M", "N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z"]
def CleanUpDirectory():
if os.path.isdir('data'):
shutil.rmtree("data")
if os.path.isdir("extracted_data"):
shutil.rmtree("extracted_data")
def InitDirectory():
os.mkdir("data")
os.mkdir("extracted_data")
def GetAllCompany():
company = []
for prefix in AllPrefixes:
url = BaseURL + prefix
r = requests.get(url)
soup = BeautifulSoup(r.content, "html.parser")
tdTags = soup.find_all("td")
for tdTag in tdTags:
aTags = tdTag.find_all("a")
for aTag in aTags:
DownloadZip(BaseCompanyURL + aTag['href'])
return company
def DownloadZip(company: str):
print(company)
r = requests.get(company)
soup = BeautifulSoup(r.content, "html.parser")
company_name = soup.find_all(
"div", {'class': 'col-xs-12 col-md-12 col-lg-8'})[0].text.split()[0]
aTags = soup.find_all("a")
downloadURL = False
for aTag in aTags:
if 'งบไตรมาส' in aTag.text or 'งบปี' in aTag.text:
downloadURL = aTag["href"]
if downloadURL:
print(company_name + " : " + downloadURL)
response = requests.get(downloadURL, stream=True)
with open("data/"+company_name + ".zip", "wb") as handle:
for data in tqdm(response.iter_content()):
handle.write(data)
else:
raise Exception(company_name + " : " + "File Url Not Found")
def main():
CleanUpDirectory()
InitDirectory()
GetAllCompany()
main()