-
Notifications
You must be signed in to change notification settings - Fork 22
/
Copy pathcableav.py
106 lines (92 loc) · 2.82 KB
/
cableav.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
import requests
from bs4 import BeautifulSoup
from fake_useragent import UserAgent
import re
from datetime import datetime
from time import sleep
from random import randint
FILE_PATH = './'
host = 'https://www.cableav.tv/'
proxies = {
'http': 'http://127.0.0.1:7890',
'https': 'http://127.0.0.1:7890'
}
ua = UserAgent()
headers = {
"accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9",
"accept-encoding": "gzip, deflate, br",
"accept-language": "zh-CN,zh;q=0.9",
"cache-control": "max-age=0",
"dnt":"1",
"referer":"https://cableav.tv/playlist/",
"user-agent": ua.random
}
def open_page(url):
sleep(randint(1,3))
print('\n{} - [INFO]: requests at {}'.format(
datetime.now().strftime("%Y-%m-%d %H:%M:%S"),url))
req = requests.get(url,headers=headers,proxies=proxies)
try:
if req.status_code == 200 or req.status_code == 304:
req.encoding = 'utf-8'
return req
except TimeoutError:
print("Timeout:")
cnt = 0
while cnt < 3:
open_page(url)
cnt += 1
def parse_playlist(html):
if html != None:
page = BeautifulSoup(html.text,'lxml')
video_urls = page.select('div.listing-content > h3 > a')
for i in video_urls:
data = i.get('href')
yield data
else:
print("Result is None! \n")
pass
def parse_video(html):
PATTERN_URL = r'.*\"single_media_sources\":(\[\{.*\}\])'
if html != None:
page = BeautifulSoup(html.text,'lxml')
m3u8 = page.find("meta", {"property": "og:video:url"})["content"]
video_tags = page.find_all("meta", {"property": "video:tag"})
best_quality = max([int(tag["content"][: -1]) for tag in video_tags])
title = page.find("title").text.replace(' - CableAV','')
for line in html.text.split('\n'):
match = re.match(PATTERN_URL, line)
if match:
quality_lists = eval(match.group(1))
for quality in quality_lists:
if str(best_quality) in quality['source_label']:
m3u8 = quality['source_file'].replace('\/', '/')
break
# return [title,m3u8]
save_file(title,m3u8)
def save_file(title,m3u8):
try:
with open(FILE_PATH + 'test.txt','ab+') as f:
result = '{},{}\r\n'.format(title,m3u8)
f.write(result.encode('utf-8'))
f.close()
except IOError as e:
print(e)
pass
def run(url):
page = open_page(url)
play_list = parse_playlist(page)
for i in play_list:
video_page = open_page(i)
parse_video(video_page)
if __name__ == '__main__':
while True:
start_url = input("Input page URL: \n")
page_num = int(input('Input page list num:\n'))
if page_num <= 1:
run(start_url)
else:
urls = [start_url + "page/" + "{}/".format(x) for x in range(2,page_num+1)]
run(start_url)
for url in urls:
run(url)