Skip to content

Commit

Permalink
add 'restrict-filenames' option (#348)
Browse files Browse the repository at this point in the history
  • Loading branch information
mikf committed Jul 23, 2019
1 parent 60cf403 commit b1bea8a
Show file tree
Hide file tree
Showing 5 changed files with 42 additions and 44 deletions.
18 changes: 18 additions & 0 deletions docs/configuration.rst
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,24 @@ Description Directory path used as the base for all download destinations.
=========== =====


extractor.*.restrict-filenames
------------------------------
=========== =====
Type ``string``
Default ``"auto"``
Example ``"/!? ()[]{}"``
Description Characters to replace with underscores (``_``) when generating
directory and file names.

Special values:

* ``"auto"``: Use characters from ``"unix"`` or ``"windows"``
depending on the local operating system
* ``"unix"``: ``"/"``
* ``"windows"``: ``"<>:\"\\|/?*"``
=========== =====


extractor.*.skip
----------------
=========== =====
Expand Down
1 change: 1 addition & 0 deletions docs/gallery-dl.conf
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
"skip": true,
"sleep": 0,
"user-agent": "Mozilla/5.0 (X11; Linux x86_64; rv:68.0) Gecko/20100101 Firefox/68.0",
"restrict-filenames": "auto",

"artstation":
{
Expand Down
22 changes: 0 additions & 22 deletions gallery_dl/text.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,22 +83,6 @@ def nameext_from_url(url, data=None):
return data


def clean_path_windows(path):
"""Remove illegal characters from a path-segment (Windows)"""
try:
return re.sub(r'[<>:"\\/|?*]', "_", path)
except TypeError:
return ""


def clean_path_posix(path):
"""Remove illegal characters from a path-segment (Posix)"""
try:
return path.replace("/", "_")
except AttributeError:
return ""


def extract(txt, begin, end, pos=0):
"""Extract the text between 'begin' and 'end' from 'txt'
Expand Down Expand Up @@ -266,12 +250,6 @@ def parse_datetime(date_string, format="%Y-%m-%dT%H:%M:%S%z"):
return date_string


if os.name == "nt":
clean_path = clean_path_windows
else:
clean_path = clean_path_posix


urljoin = urllib.parse.urljoin

quote = urllib.parse.quote
Expand Down
25 changes: 23 additions & 2 deletions gallery_dl/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -535,6 +535,27 @@ def __init__(self, extractor):
if os.altsep and os.altsep in self.basedirectory:
self.basedirectory = self.basedirectory.replace(os.altsep, os.sep)

restrict = extractor.config("restrict-filenames", "auto")
if restrict == "auto":
restrict = "<>:\"\\/|?*" if os.name == "nt" else "/"
elif restrict == "unix":
restrict = "/"
elif restrict == "windows":
restrict = "<>:\"\\/|?*"
self.clean_path = self._build_cleanfunc(restrict)

@staticmethod
def _build_cleanfunc(repl):
if not repl:
return lambda x: x
elif len(repl) == 1:
def func(x, r=repl):
return x.replace(r, "_")
else:
def func(x, sub=re.compile("[" + re.escape(repl) + "]").sub):
return sub("_", x)
return func

def open(self, mode="wb"):
"""Open file and return a corresponding file object"""
return open(self.temppath, mode)
Expand All @@ -551,7 +572,7 @@ def set_directory(self, keywords):
"""Build directory path and create it if necessary"""
try:
segments = [
text.clean_path(
self.clean_path(
Formatter(segment, self.kwdefault)
.format_map(keywords).strip())
for segment in self.directory_fmt
Expand Down Expand Up @@ -597,7 +618,7 @@ def fix_extension(self, _=None):
def build_path(self):
"""Use filename-keywords and directory to build a full path"""
try:
self.filename = text.clean_path(
self.filename = self.clean_path(
self.formatter.format_map(self.keywords))
except Exception as exc:
raise exception.FormatError(exc, "filename")
Expand Down
20 changes: 0 additions & 20 deletions test/test_text.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,26 +139,6 @@ def test_nameext_from_url(self, f=text.nameext_from_url):
for value in INVALID:
self.assertEqual(f(value), empty)

def test_clean_path_windows(self, f=text.clean_path_windows):
self.assertEqual(f(""), "")
self.assertEqual(f("foo"), "foo")
self.assertEqual(f("foo/bar"), "foo_bar")
self.assertEqual(f("foo<>:\"\\/|?*bar"), "foo_________bar")

# invalid arguments
for value in INVALID:
self.assertEqual(f(value), "")

def test_clean_path_posix(self, f=text.clean_path_posix):
self.assertEqual(f(""), "")
self.assertEqual(f("foo"), "foo")
self.assertEqual(f("foo/bar"), "foo_bar")
self.assertEqual(f("foo<>:\"\\/|?*bar"), "foo<>:\"\\_|?*bar")

# invalid arguments
for value in INVALID:
self.assertEqual(f(value), "")

def test_extract(self, f=text.extract):
txt = "<a><b>"
self.assertEqual(f(txt, "<", ">"), ("a" , 3))
Expand Down

0 comments on commit b1bea8a

Please sign in to comment.