This repository has been archived by the owner on Nov 11, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathpacktSnatch.py
123 lines (112 loc) · 4.09 KB
/
packtSnatch.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
'''
@author: TJ Nelson
Order and Download Tool for Packtpub Free Learning
'''
import ssl
import os
import shutil
from requests.adapters import HTTPAdapter
from requests.packages.urllib3.poolmanager import PoolManager
import requests
import argparse
from bs4 import BeautifulSoup as bs4
parser = argparse.ArgumentParser(description='This program will download and order ebooks from http://packtpub.com/packt/offers/free-learning')
parser.add_argument('-u','--username', help='Username to log into Packtpub', required=True)
parser.add_argument('-p','--password', help='Password to log into Packtpub', required=True)
parser.add_argument('--get', action='store_true', help='Gets the current free learning ebook download')
parser.add_argument('--download', action='store_true', help='Downloads all of the ebooks you have in your account')
args = parser.parse_args()
base_url = 'https://www.packtpub.com/'
EMAIL = args.username
PASSWORD = args.password
class MyAdapter(HTTPAdapter):
def init_poolmanager(self, connections, maxsize, block=False):
""" Supports the TLSv1 """
self.poolmanager = PoolManager(num_pools=connections,
maxsize=maxsize,
block=block,
ssl_version=ssl.PROTOCOL_TLSv1)
def getBooklist(packt):
"""
Connects to my-ebooks page
pulls all of the titles in their library
Returns a dictionary with book title and id
"""
page = packt.get(base_url + 'account/my-ebooks')
soup = bs4(page.content, "html.parser")
books = soup.find_all('div', class_='product-line unseen')
book_list = {book['nid']: book['title'][:-8] for book in books}
return book_list
def downloadBook(packt, nid, value):
"""
Downloads ebook to pdf by book id number
"""
url = 'https://www.packtpub.com/ebook_download/' + nid + '/pdf'
name = value.replace('/', '-')
if os.path.isfile(os.path.join('downloads', name) + '.pdf'):
print "Already have: " + value
pass
else:
print "Downloading: " + value
response = packt.get(url, stream=True)
with open(os.path.join('downloads', name) + '.pdf', 'wb+') as out_file:
shutil.copyfileobj(response.raw, out_file)
del response
def getEbook(packt):
"""
Connects to Free Learning page
Collects eBook title, description and link
Orders the book if not already in users library
"""
print 'Getting book info from: ' + base_url + 'packt/offers/free-learning'
page = packt.get(base_url + 'packt/offers/free-learning')
soup = bs4(page.content, "html.parser")
book_title = soup.find('div', class_='dotd-title').h2.text.strip()
book_img = soup.find('img', class_='bookimage imagecache imagecache-dotd_main_image')['src'][2:]
book_description = soup.find_all('div')[172].text.strip()
book_link = soup.find('a', class_='twelve-days-claim')['href'].strip()
print '----------------\nTitle: ' + book_title + '\nDescription: ' + book_description + '\nLink: ' \
+ base_url + book_link[1:]
if book_link.split('/')[-2] in getBooklist(packt):
return ("This book is already in your library")
else:
try:
packt.get(base_url + book_link)
return "Ebook Ordered!"
except:
return "Error Ordering EBook... :("
def downloadEbooks(packt):
"""
Creates a downloads folders then downloads books to folder
"""
print 'Gathering book list for download...'
try:
os.makedirs(os.path.join(os.path.abspath('.'),'downloads'))
except:
pass
for key, value in getBooklist(packt).iteritems():
downloadBook(packt, key, value)
return "All books have been downloaded"
def main():
"""
Main function
Initiates requests session and performs auth post
"""
with requests.Session() as packt:
packt.mount('https://', MyAdapter())
packt.get(base_url)
print '=== Logging in as user: ' + EMAIL + '\n'
login_data = {
'email': EMAIL,
'password': PASSWORD,
'op':'Login',
'form_build_id':'form-792e6411b13910aa65d1b7ab8c561fd4',
'form_id':'packt_user_login_form'
}
packt.post(base_url, data=login_data, headers={'Referer': 'http://www.packtpub.com'})
if (args.get):
print getEbook(packt)
elif (args.download):
print downloadEbooks(packt)
if __name__ == '__main__':
main()