forked from madwind/flexget_qbittorrent_mod
-
Notifications
You must be signed in to change notification settings - Fork 0
/
html_rss.py
112 lines (101 loc) · 4.06 KB
/
html_rss.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
from urllib.parse import urljoin
from flexget import plugin
from flexget.entry import Entry
from flexget.event import event
from flexget.task import Task
from flexget.utils.soup import get_soup
from loguru import logger
from requests import RequestException
from .ptsites.utils import net_utils
class PluginHtmlRss:
schema = {
'type': 'object',
'properties': {
'url': {'type': 'string', 'format': 'url'},
'user-agent': {'type': 'string'},
'cookie': {'type': 'string'},
'params': {'type': 'string'},
"root_element_selector": {'type': 'string'},
'fields': {
'type': 'object',
'properties': {
'title': {
'type': 'object',
'properties': {
'element_selector': {'type': 'string'},
'attribute': {'type': 'string'},
}
},
'url': {
'type': 'object',
'properties': {
'element_selector': {'type': 'string'},
'attribute': {'type': 'string'},
},
}
},
'required': ['title', 'url'],
}
},
'required': ['url', 'root_element_selector'],
'additionalProperties': False
}
def prepare_config(self, config: dict) -> dict:
config.setdefault('url', '')
config.setdefault('user-agent', '')
config.setdefault('cookie', '')
config.setdefault('params', '')
config.setdefault('root_element_selector', '')
config.setdefault('fields', {})
return config
def on_task_input(self, task: Task, config: dict) -> list[Entry]:
config = self.prepare_config(config)
url = config['url']
user_agent = config.get('user-agent')
cookie = config.get('cookie')
root_element_selector = config.get('root_element_selector')
fields = config['fields']
params = config['params']
headers = {
'accept-encoding': 'gzip, deflate, br',
'user-agent': user_agent
}
entries: list[Entry] = []
try:
task.requests.headers.update(headers)
task.requests.cookies.update(net_utils.cookie_str_to_dict(cookie))
response = task.requests.get(url, timeout=60)
content = net_utils.decode(response)
except RequestException as e:
raise plugin.PluginError(
'Unable to download the Html for task {} ({}): {}'.format(task.name, url, e)
)
elements = get_soup(content).select(root_element_selector)
if len(elements) == 0:
logger.debug(f'no elements found in response: {content}')
return entries
for element in elements:
logger.debug('element in element_selector: {}', element)
entry = Entry()
for key, value in fields.items():
entry[key] = ''
sub_element = element.select_one(value['element_selector'])
if sub_element:
if value['attribute'] == 'textContent':
sub_element_content = sub_element.get_text()
else:
sub_element_content = sub_element.get(value['attribute'], '')
entry[key] = sub_element_content
logger.debug('key: {}, value: {}', key, entry[key])
if entry['title'] and entry['url']:
base_url = urljoin(url, entry['url'])
if params.startswith("&"):
entry['url'] = base_url + params
else:
entry['url'] = urljoin(base_url, params)
entry['original_url'] = entry['url']
entries.append(entry)
return entries
@event('plugin.register')
def register_plugin() -> None:
plugin.register(PluginHtmlRss, 'html_rss', api_ver=2)