-
Notifications
You must be signed in to change notification settings - Fork 0
/
NU_Scraper.py
153 lines (145 loc) · 6.14 KB
/
NU_Scraper.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
import requests, lxml.html
import getpass
import sys
from bs4 import BeautifulSoup
from novel import Novel
from settings import Settings
import os
import time
def login():
username = input("Enter your NovelUpdates Username: ")
password = getpass.getpass("Enter your NovelUpdates Password: ")
return username, password
def scrapper(username, password):
# logs onto website and pulls in the data from reading list onto a file
print("\tLogging in to NovelUpdates")
s = requests.session()
response = s.get('https://www.novelupdates.com/login/', timeout = 5)
if response.status_code == 200:
login_html = lxml.html.fromstring(response.text)
hidden_inputs = login_html.xpath(r'//form//input[@type="hidden"]')
form = {x.attrib["name"]:x.attrib["value"] for x in hidden_inputs}
form['log'] = username
form['pwd'] = password
response = s.post('https://www.novelupdates.com/login/',data=form)
if response.url == 'https://www.novelupdates.com/':
print("\t ===Login Sucess===")
reading_list = s.get('https://www.novelupdates.com/reading-list/')
return reading_list.text
else:
print("Error: Unable to log on, please check credentials")
sys.exit()
else:
print("Failure to access login page")
sys.exit()
def html_parse(html, fileHandle):
"""fileHandle is a work in progress feature of creating a reading list offline"""
reading_list = []
soup = BeautifulSoup(html,'html.parser')
print("\t Parsing Reading List")
print(" ====================================")
title_names = soup.find_all("tr",attrs={"class":"rl_links"})
chapters = soup.find_all("a",attrs={"class":"chp-release"})
counter = 0;
if (len(chapters)%2) != 0:
print("Error: Incorrect number of chapters")
sys.exit()
if fileHandle:
for i in range(len(title_names)):
title = title_names[i].attrs["data-title"]
current_chapter = chapters[i + counter].get_text()
latest_chapter = chapters[i+ counter + 1].get_text()
counter += 1
temp = Novel(title,latest_chapter, current_chapter)
reading_list.append(temp)
return(reading_list)
else:
saveFile = open("readingList.txt","w")
counter = 0
for i in range(len(title_names)):
saveFile.write(title_names[i].attrs["data-title"] + ",")
saveFile.write(chapters[i + counter].get_text() + ",")
saveFile.write(chapters[i+ counter + 1].get_text() + "\n")
counter += 1
print("The save file,'readingList.txt' has been created...")
# temp[2][0:len(temp[2])-1]
def fileReader():
if os.path.isfile("readingList.txt") == False:
print("Error: Save file, 'readingList.txt' is missing")
sys.exit()
readingList = []
saveFile = open("readingList.txt","r")
line = saveFile.readline()
while(len(line) != 0):
data = line.split(",")
newNovel = Novel(data[0], data[1], data[2][0:len(data[2])-1])
readingList.append(newNovel)
line = saveFile.readline()
return readingList
def newChapters(reading_list):
updates = []
for novel in reading_list:
if novel.new_update():
updates.append(novel.title)
print("There are " + str(len(updates)) + " new updates...")
for i in range (len(updates)):
print(" " + str(i) + " - " + updates[i])
return 0
def printAll(reading_list):
print("\n\tCurrent Reading List:")
for novel in reading_list:
novel.printNovel()
if __name__ == '__main__':
while(1):
config = Settings()
if config.prompt:
if config.offload:
username = config.username
password = config.password
if config.scheduled:
while(1):
try:
print("Begining to Scheduled Action")
time.sleep(config.pause) #pauses for an hour
currentHour = time.localtime(time.time()).tm_hour
if str(currentHour) in config.times:
html_string = scrapper(username,password)
readingList = html_parse(html_string, fileHandle=True)
updates = newChapters(readingList)
print('\n')
except KeyboardInterrupt:
print("\nEnding program")
sys.exit()
else:
html_string = scrapper(username,password)
readingList = html_parse(html_string, fileHandle=True)
updates = newChapters(readingList)
sys.exit()
else:
username, password = login()
html_string = scrapper(username,password)
readingList = html_parse(html_string, fileHandle=True)
updates = newChapters(readingList)
sys.exit()
else:
username, password = config.initialization()
html_string = scrapper(username,password)
readingList = html_parse(html_string, fileHandle=True)
# readingList = fileReader()
updates = newChapters(readingList)
if config.scheduled:
print("Begining to Scheduled Action")
while(1):
try:
time.sleep(3600) #pauses for an hour
currentTime = time.localtime(time.time())
currentHour = currentTime.tm_hour
if str(currentHour) in config.times: #will change to add options of changing the settings
html_string = scrapper(username,password)
readingList = html_parse(html_string, fileHandle=True)
updates = newChapters(readingList)
except KeyboardInterrupt:
print("\nEnding program")
sys.exit()
else:
sys.exit()