Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use Python's standard filesystem encoding for command-line arguments #4507

Merged
merged 4 commits into from
Oct 3, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 5 additions & 28 deletions beets/util/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@
import os
import sys
import errno
import locale
import re
import tempfile
import shutil
Expand Down Expand Up @@ -332,12 +331,7 @@ def arg_encoding():
"""Get the encoding for command-line arguments (and other OS
locale-sensitive strings).
"""
try:
return locale.getdefaultlocale()[1] or 'utf-8'
except ValueError:
# Invalid locale environment variable setting. To avoid
# failing entirely for no good reason, assume UTF-8.
return 'utf-8'
return sys.getfilesystemencoding()


def _fsencoding():
Expand Down Expand Up @@ -837,13 +831,14 @@ def cpu_count():


def convert_command_args(args):
"""Convert command arguments to bytestrings on Python 2 and
surrogate-escaped strings on Python 3."""
"""Convert command arguments, which may either be `bytes` or `str`
objects, to uniformly surrogate-escaped strings.
"""
assert isinstance(args, list)

def convert(arg):
if isinstance(arg, bytes):
arg = arg.decode(arg_encoding(), 'surrogateescape')
return os.fsdecode(arg)
return arg

return [convert(a) for a in args]
Expand Down Expand Up @@ -1092,21 +1087,3 @@ def wrapper(self):
return value

return wrapper


def decode_commandline_path(path):
"""Prepare a path for substitution into commandline template.

On Python 3, we need to construct the subprocess commands to invoke as a
Unicode string. On Unix, this is a little unfortunate---the OS is
expecting bytes---so we use surrogate escaping and decode with the
argument encoding, which is the same encoding that will then be
*reversed* to recover the same bytes before invoking the OS. On
Windows, we want to preserve the Unicode filename "as is."
"""
# On Python 3, the template is a Unicode string, which only supports
# substitution of Unicode variables.
if platform.system() == 'Windows':
return path.decode(_fsencoding())
else:
return path.decode(arg_encoding(), 'surrogateescape')
6 changes: 3 additions & 3 deletions beetsplug/convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@

"""Converts tracks or albums to external directory
"""
from beets.util import par_map, decode_commandline_path, arg_encoding
from beets.util import par_map, arg_encoding

import os
import threading
Expand Down Expand Up @@ -214,8 +214,8 @@ def encode(self, command, source, dest, pretend=False):
self._log.info('Encoding {0}', util.displayable_path(source))

command = command.decode(arg_encoding(), 'surrogateescape')
source = decode_commandline_path(source)
dest = decode_commandline_path(dest)
source = os.fsdecode(source)
dest = os.fsdecode(dest)

# Substitute $source and $dest in the argument list.
args = shlex.split(command)
Expand Down
5 changes: 3 additions & 2 deletions beetsplug/duplicates.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,12 @@
"""

import shlex
import os

from beets.plugins import BeetsPlugin
from beets.ui import decargs, print_, Subcommand, UserError
from beets.util import command_output, displayable_path, subprocess, \
bytestring_path, MoveOperation, decode_commandline_path
bytestring_path, MoveOperation
from beets.library import Item, Album


Expand Down Expand Up @@ -200,7 +201,7 @@ def _checksum(self, item, prog):
output as flexattr on a key that is the name of the program, and
return the key, checksum tuple.
"""
args = [p.format(file=decode_commandline_path(item.path))
args = [p.format(file=os.fsdecode(item.path))
for p in shlex.split(prog)]
key = args[0]
checksum = getattr(item, key, False)
Expand Down
4 changes: 4 additions & 0 deletions docs/changelog.rst
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,10 @@ Bug fixes:
* :doc:`/plugins/replaygain`: Avoid a crash when errors occur in the analysis
backend.
:bug:`4506`
* We now use Python's defaults for command-line argument encoding, which
should reduce the chance for errors and "file not found" failures when
invoking other command-line tools, especially on Windows.
:bug:`4507`
* We now respect the Spotify API's rate limiting, which avoids crashing when the API reports code 429 (too many requests).
:bug:`4370`
* Fix implicit paths OR queries (e.g. ``beet list /path/ , /other-path/``)
Expand Down
1 change: 1 addition & 0 deletions test/test_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,7 @@ def test_sanitize_empty_component(self):
])
self.assertEqual(p, 'foo/_/bar')

@unittest.skipIf(sys.platform == 'win32', 'win32')
def test_convert_command_args_keeps_undecodeable_bytes(self):
arg = b'\x82' # non-ascii bytes
cmd_args = util.convert_command_args([arg])
Expand Down