-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmax_path.py
282 lines (241 loc) · 13.8 KB
/
max_path.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
import sys, os, traceback
import pathlib
from pathlib import PurePath
from dateutil.parser import parse
from termcolor import cprint
import colorama
from colorama import Fore
BOLD_ONLY = ['bold']
colorama.init() # Windows need this
HIGHER_RED = Fore.LIGHTRED_EX
import platform
plat = platform.system().lower()
if ('window' in plat) or plat.startswith('win'):
# Darwin should treat as Linux
IS_WIN = True
# https://stackoverflow.com/questions/16755142/how-to-make-win32-console-recognize-ansi-vt100-escape-sequences
# Even though ANSI escape sequence can be enable via `REG ADD HKCU\CONSOLE /f /v VirtualTerminalLevel /t REG_DWORD /d 1`
# But since this is not big deal to hide logo testing for this project, so no need.
ANSI_CLEAR = '\r' # Default cmd settings not support ANSI sequence
ANSI_END_COLOR = ''
ANSI_BLUE = ''
else:
IS_WIN = False
ANSI_CLEAR = '\r\x1b[0m\x1b[K'
ANSI_END_COLOR = '\x1b[0m\x1b[K'
ANSI_BLUE = '\x1b[1;44m'
# MAX_PATH 260 need exclude 1 terminating null character <NUL>
# if prefix \\?\ + abspath to use Windows extented-length(i.e. in my case, individual filename/dir can use full 259, no more 259 is fit full path), then the MAX_PATH is 259 - 4 = 255
#[DEPRECATED] no more 259 since always -el now AND Windows 259 - \\?\ == 255 normal Linux
WIN_MAX_PATH = 255 # MAX_PATH 260 need exclude 1 terminating null character <NUL>
# https://stackoverflow.com/a/2556252/1074998
#def rreplace(s, old, new, occurrence):
# li = s.rsplit(old, occurrence)
# return new.join(li)
# https://stackoverflow.com/questions/16870663
def validate_date(date_text):
try:
parse(date_text)
except ValueError:
raise ValueError("Incorrect data format, should be YYMMDD")
return True
def sanitize(path):
# trim multiple whitespaces # ".." is the responsibilities of get max path
# Use PurePath instead of os.path.basename https://stackoverflow.com/a/31273488/1074998 , e.g.:
#>>> PurePath( '/home/iced/..'.replace('..', '') ).parts[-1] # get 'iced'
#>>> os.path.basename('/home/iced/..'.replace('..', '')) # get empty ''
# Ensure .replace('..', '') is last replacement before .strip() AND not replace back to dot '.'
# https://docs.microsoft.com/en-us/windows/win32/fileio/naming-a-file
# [todo:0] Handle case sensitive and reserved file names in Windows like Chrome "Save page as" do
# For portable to move filename between linux <-> win, should use IS_WIN only (but still can't care if case sensitive filename move to case in-sensitive filesystem).
# IS_WIN:
path = path.replace('<', '').replace('>', '').replace('"', '\'').replace('?', '').replace('*', '').replace('/', '_').replace('\\', '_').replace('|', '_').replace(':', '_').replace('.', '_').strip()
# Linux:
#path.replace('/', '|').replace(':', '_').replace('.', '_').strip()
# Put this after replace patterns above bcoz 2 distinct spaces may merge together become multiple-spaces, e.g. after ' ? ' replace to ' '
# If using .replace(' ', ' ') will only replace once round, e.g. ' ' become
path = ' '.join(path.split())
p = PurePath( path )
if p.parts:
return p.parts[-1]
else:
return ''
# The filesystem limits is 255(normal) , 242(docker) or 143((eCryptfs) bytes
# So can't blindly [:] slice without encode first
# And need decode back after slice
# And to ensure mix sequence byte in UTF-8 and work
#, e.g. abc𪍑𪍑𪍑
# , need try/catch to skip next full bytes of "1" byte ascii" OR "3 bytes 我" or "4 bytes 𪍑"
# ... by looping 4 bytes(UTF-8 max) from right to left
# HTML5 forbid UTF-16, UTF-16/32 not encourage to use in web page
# So only encode/decode in utf-8
# https://stackoverflow.com/questions/13132976
# https://stackoverflow.com/questions/50385123
# https://stackoverflow.com/questions/11820006
def get_max_path(arg_cut, fs_f_max, fpart_excluded_immutable, immutable):
#print('before f: ' + fpart_excluded_immutable)
if arg_cut >= 0:
fpart_excluded_immutable = fpart_excluded_immutable[:arg_cut]
if immutable:
# immutable shouldn't limit to 1 byte(may be change next time or next project), so need encode also
immutable_len = len(immutable.encode('utf-8'))
else:
immutable_len = 0
space_remains = fs_f_max - immutable_len
if space_remains < 1:
return '' # No more spaces to trim(bcoz directories name too long), so only shows PinID.jpg
# range([start], stop[, step])
# -1 step * 4 loop = -4, means looping 4 bytes(UTF-8 max) from right to left
for gostan in range(space_remains, space_remains - 4, -1):
try:
fpart_excluded_immutable = fpart_excluded_immutable.encode('utf-8')[: gostan ].decode('utf-8')
break # No break still same result, but waste
except UnicodeDecodeError:
pass #print('Calm down, this is normal: ' + str(gostan) + ' f: ' + fpart_excluded_immutable)
#print('after f: ' + fpart_excluded_immutable)
# Last safety resort, in case any bug:
fpart_excluded_immutable_base = sanitize ( fpart_excluded_immutable )
if fpart_excluded_immutable_base != fpart_excluded_immutable.strip(): # Original need strip bcoz it might cut in space
cprint(''.join([ HIGHER_RED, '\n[! A] Please report to me which Link/scenario it print this log.\
Thanks:\n{} # {} # {} # {} # {}\n\n'
.format(arg_cut, fs_f_max, repr(fpart_excluded_immutable), repr(fpart_excluded_immutable_base), immutable) ]), attrs=BOLD_ONLY, end='' )
return fpart_excluded_immutable_base
def get_output_file_path(arg_cut, fs_f_max, pre_immutable, human_fname, save_dir):
immutable = sanitize(str(pre_immutable))
if not immutable.strip(): # Ensure add hard-coded extension to avoid empty id and leave single dot in next step
raise('pre_immutable empty')
fpart_excluded_ext_before = sanitize( human_fname )
#print( 'get output f:' + repr(fpart_excluded_ext_before) )
fpart_excluded_ext = get_max_path(arg_cut, fs_f_max, fpart_excluded_ext_before
, immutable)
if fpart_excluded_ext:
if fpart_excluded_ext_before == fpart_excluded_ext: # means not truncat
# Prevent confuse when trailing period become '..'ext and looks like '...'
if fpart_excluded_ext[-1] == '.':
fpart_excluded_ext = fpart_excluded_ext[:-1]
else: # Truncated
# No need care if two/three/... dots, overkill to trim more and loss information
if fpart_excluded_ext[-1] == '.':
fpart_excluded_ext = fpart_excluded_ext[:-1]
fpart_excluded_ext = get_max_path(arg_cut, fs_f_max, fpart_excluded_ext
, '...' + immutable)
fpart_excluded_ext = fpart_excluded_ext + '...'
return fpart_excluded_ext
'''
# To make final output path consistent with IS_WIN's abspath above, so also do abspath here:
# (Please ensure below PurePath's file_path checking is using abspath if remove abspath here in future)
file_path = os.path.abspath( os.path.join(save_dir, '{}'.format( fpart_excluded_ext)) ) # + immutable
#if '111' in file_path:
# print('last fp: ' + file_path + ' len: ' + str(len(file_path.encode('utf-8'))))
try:
# Note this is possible here if only . while the rest is empty, e.g. './.'
# But better throws and inform me if that abnormal case.
if PurePath(os.path.abspath(save_dir)).parts[:] != PurePath(file_path).parts[:-1]:
cprint(''.join([ HIGHER_RED, '\n[! B] Please report to me which Link/scenario it print this log.\
Thanks: {} # {} # {} # {} # {} \n\n'
.format(arg_cut, fs_f_max, fpart_excluded_ext + immutable, save_dir, file_path) ]), attrs=BOLD_ONLY, end='' )
file_path = os.path.join(save_dir, '{}'.format( sanitize(fpart_excluded_ext + immutable)))
if PurePath(os.path.abspath(save_dir)).parts[:] != PurePath(file_path).parts[:-1]:
cprint(''.join([ HIGHER_RED, '\n[! C] Please report to me which Link/scenario it print this log.\
Thanks: {} # {} # {} # {} # {} \n\n'
.format(arg_cut, fs_f_max, fpart_excluded_ext + immutable, save_dir, file_path) ]), attrs=BOLD_ONLY, end='' )
raise
except IndexError:
cprint(''.join([ HIGHER_RED, '\n[! D] Please report to me which Link/scenario it print this log.\
Thanks: {} # {} # {}\n\n'
.format(arg_cut, fs_f_max, fpart_excluded_ext + immutable) ]), attrs=BOLD_ONLY, end='' )
raise
#print('final f: ' + file_path)
return file_path
'''
py_out = sys.stdin
found_err_line = False;
if not py_out:
out = ''
error_patterns = [
"ERROR: unable to open for writing: [Errno 36] File name too long: './",
"ERROR: unable to open for writing: [Errno 36] Filename too long: './", # new #2
'ERROR: unable to open for writing: [Errno 36] File name too long: "./',
'ERROR: unable to open for writing: [Errno 36] Filename too long: "./', # new #2
"ERROR: unable to download video data: [Errno 36] File name too long: './", # old
"ERROR: Unable to download video: [Errno 36] File name too long: './", # new #1
"ERROR: Unable to download video: [Errno 36] Filename too long: './", # new #2
'ERROR: unable to download video data: [Errno 36] File name too long: "./', # old
'ERROR: Unable to download video: [Errno 36] File name too long: "./' # new #1 ('Unable' become uppercase and no 'data')
'ERROR: Unable to download video: [Errno 36] Filename too long: "./' # new #2 ('Filename' no space)
]
for out in py_out:
for pattern in error_patterns:
if out.startswith(pattern):
err_filename = out.strip().replace(pattern, '', 1)
err_filename = err_filename.rstrip('"' if '"' in pattern else "'")
found_err_line = True
break
if found_err_line:
break
if not found_err_line:
# included `This video is only available for registered users` and `Unable to download webpage_ urlopen error timed out`
print('Max Path Failed [e1] ' + sanitize(out)) # will be title to download
sys.exit(1)
if __name__ == '__main__':
fs_f_max = None
if IS_WIN:
fs_f_max = WIN_MAX_PATH
else:
# 255 bytes is normaly fs max, 242 is docker max, 143 bytes is eCryptfs max
# https://github.com/moby/moby/issues/1413 , https://unix.stackexchange.com/questions/32795/
# To test eCryptfs: https://unix.stackexchange.com/questions/426950/
# If IS_WIN check here then need add \\?\\ for WIN-only
for fs_f_max_i in (255, 242, 143):
try:
with open('A'*fs_f_max_i, 'r') as f:
fs_f_max = fs_f_max_i # if got really this long A exists will come here
break
except FileNotFoundError:
# Will throws OSError first if both FileNotFoundError and OSError met
# , BUT if folder not exist then will throws FileNotFoundError first
# But current directory already there, so can use this trick
# In worst case just raise it
fs_f_max = fs_f_max_i # Normally came here in first loop
break
except OSError: # e.g. File name too long
pass #print('Try next') # Or here first if eCryptfs
#print('fs filename max len is ' + repr(fs_f_max))
# https://github.com/ytdl-org/youtube-dl/pull/25475
# https://stackoverflow.com/questions/54823541/what-do-f-bsize-and-f-frsize-in-struct-statvfs-stand-for
if fs_f_max is None: # os.statvfs ,ay not avaiable in Windows, so lower priority
#os.statvfs('.').f_frsize - 1 = 4095 # full path max bytes
fs_f_max = os.statvfs('.').f_namemax
#print(sys.argv)
#print(err_filename)
#print('llen:' + str(len(err_filename.split('-')[-3])) )
#print('llen:' + str(err_filename.split('-')[-3]) )
if (err_filename.endswith('.part')) \
and ( (len(err_filename.split('-')[-3]) in (6, 8)) and parse(err_filename.split('-')[-3]) \
or (len(err_filename.split('-')[-2]) in (6, 8)) and parse(err_filename.split('-')[-2]) ):
if ( (len(err_filename.split('-')[-3]) in (6, 8)) and parse(err_filename.split('-')[-3]) ):
date_index = -3
else:
date_index = -2
pre_immutable = 'BUFFER' + '-' + '-'.join(err_filename.split('-')[date_index:]) # BUFFER in case mkv to webm and fragment index 1 to 999999
human_fname = '-'.join(err_filename.split('-')[:date_index]) # Possible '-' in hname
#print(human_fname)
save_dir = pathlib.Path().resolve()
yt_max_output_path = get_output_file_path(-1, fs_f_max, pre_immutable, human_fname, save_dir)
print(yt_max_output_path) # important to send output to bash
#rreplace(output_path, pre_immutable, '', 1)
elif '-NA-' in err_filename:
pre_immutable = 'BUFFERpart9999' + err_filename.split('-NA-')[-1]
human_fname = '-NA-'.join(err_filename.split('-NA-')[:-1])
save_dir = pathlib.Path().resolve()
#print('imm: ' + (pre_immutable))
#print('hnmae: ' + str(human_fname))
yt_max_output_path = get_output_file_path(-1, fs_f_max, pre_immutable, human_fname, save_dir)
print(yt_max_output_path) # important to send output to bash
else:
pre_immutable = 'BUFFERpart9999' + '-'.join(err_filename.split('-')[-2:]) # e.g. BUFFERpart9999-20250208-id123.mp4.ytdl
human_fname = '-'.join(err_filename.split('-')[:-2]) # e.g. './video title'
save_dir = pathlib.Path().resolve()
yt_max_output_path = get_output_file_path(-1, fs_f_max, pre_immutable, human_fname, save_dir)
print(yt_max_output_path)
#print('Max Path Failed [e2] ' + sanitize(err_filename)) # will be title to download