-
Notifications
You must be signed in to change notification settings - Fork 34
/
Copy pathterabox.py
133 lines (108 loc) · 3.87 KB
/
terabox.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
import re
from urllib.parse import parse_qs, urlparse
import requests
from config import COOKIE
from tools import get_formatted_size
def check_url_patterns(url):
"""
Check if a given URL matches predefined patterns.
"""
patterns = [
r"mirrobox\.com",
r"nephobox\.com",
r"freeterabox\.com",
r"1024tera\.com",
r"4funbox\.com",
r"terabox\.app",
r"terabox\.com",
r"momerybox\.com",
r"tibibox\.com",
]
return any(re.search(pattern, url) for pattern in patterns)
def extract_urls(string: str) -> list[str]:
"""
Extract valid URLs from a given string.
"""
pattern = r"(https?://\S+)"
urls = re.findall(pattern, string)
return [url for url in urls if check_url_patterns(url)]
def find_between(data: str, first: str, last: str) -> str | None:
"""
Extract text between two substrings.
"""
try:
start = data.index(first) + len(first)
end = data.index(last, start)
return data[start:end]
except ValueError:
return None
def extract_surl_from_url(url: str) -> str | None:
"""
Extract the 'surl' parameter from a URL.
"""
parsed_url = urlparse(url)
query_params = parse_qs(parsed_url.query)
return query_params.get("surl", [None])[0]
def fetch_data(url: str):
"""
Fetch and process data from a Terabox URL.
"""
session = requests.Session()
headers = {
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
"Accept-Encoding": "gzip, deflate, br",
"Accept-Language": "en-US,en;q=0.9",
"Connection": "keep-alive",
"Cookie": COOKIE,
"DNT": "1",
"User-Agent": (
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 "
"(KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36"
),
}
# Make initial request
response = session.get(url, headers=headers)
if response.status_code != 200:
return {"error": "Failed to fetch initial URL"}
# Extract required parameters
default_thumbnail = find_between(response.text, 'og:image" content="', '"')
logid = find_between(response.text, "dp-logid=", "&")
jsToken = find_between(response.text, "fn%28%22", "%22%29")
bdstoken = find_between(response.text, 'bdstoken":"', '"')
shorturl = extract_surl_from_url(response.url)
if not shorturl or not logid or not jsToken:
return {"error": "Missing required parameters"}
api_url = (
f"https://www.terabox.app/share/list?app_id=250528&web=1&channel=0"
f"&jsToken={jsToken}&dp-logid={logid}&page=1&num=20&by=name&order=asc"
f"&shorturl={shorturl}&root=1"
)
# Fetch file list from API
response = session.get(api_url, headers=headers)
if response.status_code != 200:
return {"error": "Failed to fetch API data"}
try:
response_json = response.json()
except ValueError:
return {"error": "Invalid JSON response"}
if response_json.get("errno"):
return {"error": "API returned an error", "errno": response_json["errno"]}
file_list = response_json.get("list", [])
if not file_list:
return {"error": "No files found"}
file_info = file_list[0]
direct_link_response = session.head(file_info["dlink"], headers=headers)
# Compile result data
data = {
"file_name": file_info.get("server_filename"),
"link": file_info.get("dlink"),
"direct_link": direct_link_response.headers.get("location"),
"thumb": file_info.get("thumbs", {}).get("url3", default_thumbnail),
"size": get_formatted_size(file_info.get("size", 0)),
"sizebytes": file_info.get("size", 0),
}
return data
if __name__ == "__main__":
test_url = "https://teraboxapp.com/s/1GV28I3ea5rR_oR_f6LZGwQ"
result = fetch_data(test_url)
print(result)