-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathniceoppai.py
89 lines (73 loc) · 3.17 KB
/
niceoppai.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
#!/usr/bin/python
import re
from lxml import etree as ET
from manga import Manga, App, urlretrieve, smart_cmp
class NiceOppai(Manga):
SERIES_URL = '%(baseurl)s/%(series)s/'
CHAPTER_URL = '%(baseurl)s/%(series)s/%(chapter_id)s/'
PAGE_URL = '%(baseurl)s/%(series)s/%(chapter_id)s/%(page)s/'
CHAPTER_CRE = re.compile(r'.*/[^/]+/(?P<chapter_id>[0-9-.]+)/$')
CHAPTER_PATTERN = '%(series)s-%(chapter_id)03s.cbz'
PAGE_PATTERN = '%(series)s-%(chapter_id)03s-%(page)03s'
def __init__(self):
Manga.__init__(self, 'http://www.niceoppai.net')
self.options['urlopen_args'].update({
'raise404': True,
'raise503': True,
'raisebad': True,
})
def list_chapters(self, data):
url = self.get_series_url(data)
content = urlretrieve(url)
doc = ET.HTML(content)
chapters = self._list_chapters(doc)
pages = set([n.attrib['href'] \
for n in doc.xpath("//ul[@class='pgg']/li/a")])
for url in pages:
content = urlretrieve(url)
doc = ET.HTML(content)
chapters += self._list_chapters(doc)
chapters.sort(lambda a, b: smart_cmp(a['chapter_label'], b['chapter_label']))
return chapters
def _list_chapters(self, doc):
chapters = []
for n in doc.xpath("//div[@id='sct_content']/div/div/ul/li/a"):
m = self.CHAPTER_CRE.match(n.attrib['href'])
chapters.append({'chapter_id': m.group('chapter_id'),
'chapter': m.group('chapter_id').zfill(3),
'chapter_label': m.group('chapter_id').zfill(3)})
return chapters
def _list_pages(self, doc):
pages = doc.xpath("//select[@class='cbo_wpm_pag']/option")
pages = set([i.text for i in pages])
pages = list(pages)
pages.sort()
return pages
def _download_page(self, doc):
url = doc.xpath("//div[@class='prw']/a/img")[0].attrib['src']
url = url.replace(' ', '%20')
return url
class NiceOppaiApp(App):
def __init__(self):
App.__init__(self, chapter_func=str)
if self.options.series_id:
self.data.update({'series_id': int(self.options.series_id)})
if self.options.chapter_id:
self.data.update({'chapter_id': int(self.options.chapter_id)})
self.manga = NiceOppai()
def _parse_args(self, parser):
App._parse_args(self, parser)
parser.add_option('--series_id', dest='series_id', default='',
help='Series ID')
parser.add_option('--chapter_id', dest='chapter_id', default='',
help='Chapter ID')
if __name__ == '__main__':
#import sys
#mr = NiceOppai()
#print mr.list_chapters({'series_id': 144, 'series': 'kekkaishi'})
#print mr.list_pages({'series_id': 144, 'series': 'kekkaishi', 'chapter_id': 9422, 'chapter': 1})
#mr.download_page({'series_id': 144, 'series': 'kekkaishi', 'chapter_id': 9422, 'chapter': 1, 'page': 1})
#mr.download_chapter({'series_id': 144, 'series': 'kekkaishi', 'chapter_id': 9422, 'chapter': 1})
#sys.exit(-1)
app = NiceOppaiApp()
app.run()