Skip to content

Commit

Permalink
Merge pull request #128 from pypa/feature/refactor-build-scripts
Browse files Browse the repository at this point in the history
Refactor build scripts
  • Loading branch information
jaraco authored Mar 27, 2022
2 parents b16cf40 + 7038cf2 commit 3663538
Showing 1 changed file with 114 additions and 96 deletions.
210 changes: 114 additions & 96 deletions distutils/command/build_scripts.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,10 @@
import tokenize

# check if Python is called on the first line with this expression
first_line_re = re.compile(b'^#!.*python[0-9.]*([ \t].*)?$')
shebang_pattern = re.compile(b'^#!.*python[0-9.]*([ \t].*)?$')

# for Setuptools compatibility
first_line_re = shebang_pattern


class build_scripts(Command):
Expand All @@ -33,7 +36,6 @@ def initialize_options(self):
self.scripts = None
self.force = None
self.executable = None
self.outfiles = None

def finalize_options(self):
self.set_undefined_options('build',
Expand All @@ -51,103 +53,119 @@ def run(self):
self.copy_scripts()

def copy_scripts(self):
r"""Copy each script listed in 'self.scripts'; if it's marked as a
Python script in the Unix way (first line matches 'first_line_re',
ie. starts with "\#!" and contains "python"), then adjust the first
line to refer to the current Python interpreter as we copy.
"""
Copy each script listed in ``self.scripts``.
If a script is marked as a Python script (first line matches
'shebang_pattern', i.e. starts with ``#!`` and contains
"python"), then adjust in the copy the first line to refer to
the current Python interpreter.
"""
self.mkpath(self.build_dir)
outfiles = []
updated_files = []
for script in self.scripts:
adjust = False
script = convert_path(script)
outfile = os.path.join(self.build_dir, os.path.basename(script))
outfiles.append(outfile)

if not self.force and not newer(script, outfile):
log.debug("not copying %s (up-to-date)", script)
continue

# Always open the file, but ignore failures in dry-run mode --
# that way, we'll get accurate feedback if we can read the
# script.
try:
f = open(script, "rb")
except OSError:
if not self.dry_run:
raise
f = None
else:
encoding, lines = tokenize.detect_encoding(f.readline)
f.seek(0)
first_line = f.readline()
if not first_line:
self.warn("%s is an empty file (skipping)" % script)
continue

match = first_line_re.match(first_line)
if match:
adjust = True
post_interp = match.group(1) or b''

if adjust:
log.info("copying and adjusting %s -> %s", script,
self.build_dir)
updated_files.append(outfile)
if not self.dry_run:
if not sysconfig.python_build:
executable = self.executable
else:
executable = os.path.join(
sysconfig.get_config_var("BINDIR"),
"python%s%s" % (
sysconfig.get_config_var("VERSION"),
sysconfig.get_config_var("EXE")))
executable = os.fsencode(executable)
shebang = b"#!" + executable + post_interp + b"\n"
# Python parser starts to read a script using UTF-8 until
# it gets a #coding:xxx cookie. The shebang has to be the
# first line of a file, the #coding:xxx cookie cannot be
# written before. So the shebang has to be decodable from
# UTF-8.
try:
shebang.decode('utf-8')
except UnicodeDecodeError:
raise ValueError(
"The shebang ({!r}) is not decodable "
"from utf-8".format(shebang))
# If the script is encoded to a custom encoding (use a
# #coding:xxx cookie), the shebang has to be decodable from
# the script encoding too.
try:
shebang.decode(encoding)
except UnicodeDecodeError:
raise ValueError(
"The shebang ({!r}) is not decodable "
"from the script encoding ({})"
.format(shebang, encoding))
with open(outfile, "wb") as outf:
outf.write(shebang)
outf.writelines(f.readlines())
if f:
f.close()
else:
if f:
f.close()
updated_files.append(outfile)
self.copy_file(script, outfile)

if os.name == 'posix':
for file in outfiles:
if self.dry_run:
log.info("changing mode of %s", file)
else:
oldmode = os.stat(file)[ST_MODE] & 0o7777
newmode = (oldmode | 0o555) & 0o7777
if newmode != oldmode:
log.info("changing mode of %s from %o to %o",
file, oldmode, newmode)
os.chmod(file, newmode)
# XXX should we modify self.outfiles?
self._copy_script(script, outfiles, updated_files)

self._change_modes(outfiles)

return outfiles, updated_files

def _copy_script(self, script, outfiles, updated_files):
shebang_match = None
script = convert_path(script)
outfile = os.path.join(self.build_dir, os.path.basename(script))
outfiles.append(outfile)

if not self.force and not newer(script, outfile):
log.debug("not copying %s (up-to-date)", script)
return

# Always open the file, but ignore failures in dry-run mode
# in order to attempt to copy directly.
try:
f = open(script, "rb")
except OSError:
if not self.dry_run:
raise
f = None
else:
encoding, lines = tokenize.detect_encoding(f.readline)
f.seek(0)
first_line = f.readline()
if not first_line:
self.warn("%s is an empty file (skipping)" % script)
return

shebang_match = shebang_pattern.match(first_line)

updated_files.append(outfile)
if shebang_match:
log.info("copying and adjusting %s -> %s", script,
self.build_dir)
if not self.dry_run:
if not sysconfig.python_build:
executable = self.executable
else:
executable = os.path.join(
sysconfig.get_config_var("BINDIR"),
"python%s%s" % (
sysconfig.get_config_var("VERSION"),
sysconfig.get_config_var("EXE")))
executable = os.fsencode(executable)
post_interp = shebang_match.group(1) or b''
shebang = b"#!" + executable + post_interp + b"\n"
self._validate_shebang(shebang, encoding)
with open(outfile, "wb") as outf:
outf.write(shebang)
outf.writelines(f.readlines())
if f:
f.close()
else:
if f:
f.close()
self.copy_file(script, outfile)

def _change_modes(self, outfiles):
if os.name != 'posix':
return

for file in outfiles:
self._change_mode(file)

def _change_mode(self, file):
if self.dry_run:
log.info("changing mode of %s", file)
return

oldmode = os.stat(file)[ST_MODE] & 0o7777
newmode = (oldmode | 0o555) & 0o7777
if newmode != oldmode:
log.info("changing mode of %s from %o to %o",
file, oldmode, newmode)
os.chmod(file, newmode)

@staticmethod
def _validate_shebang(shebang, encoding):
# Python parser starts to read a script using UTF-8 until
# it gets a #coding:xxx cookie. The shebang has to be the
# first line of a file, the #coding:xxx cookie cannot be
# written before. So the shebang has to be decodable from
# UTF-8.
try:
shebang.decode('utf-8')
except UnicodeDecodeError:
raise ValueError(
"The shebang ({!r}) is not decodable "
"from utf-8".format(shebang))

# If the script is encoded to a custom encoding (use a
# #coding:xxx cookie), the shebang has to be decodable from
# the script encoding too.
try:
shebang.decode(encoding)
except UnicodeDecodeError:
raise ValueError(
"The shebang ({!r}) is not decodable "
"from the script encoding ({})"
.format(shebang, encoding))

0 comments on commit 3663538

Please sign in to comment.