Merge pull request #128 from pypa/feature/refactor-build-scripts

Refactor build scripts
pypa · Mar 27, 2022 · 3663538 · 3663538
2 parents b16cf40 + 7038cf2
commit 3663538
Showing 1 changed file with 114 additions and 96 deletions.
diff --git a/distutils/command/build_scripts.py b/distutils/command/build_scripts.py
@@ -13,7 +13,10 @@
 import tokenize
 
 # check if Python is called on the first line with this expression
-first_line_re = re.compile(b'^#!.*python[0-9.]*([ \t].*)?$')
+shebang_pattern = re.compile(b'^#!.*python[0-9.]*([ \t].*)?$')
+
+# for Setuptools compatibility
+first_line_re = shebang_pattern
 
 
 class build_scripts(Command):
@@ -33,7 +36,6 @@ def initialize_options(self):
         self.scripts = None
         self.force = None
         self.executable = None
-        self.outfiles = None
 
     def finalize_options(self):
         self.set_undefined_options('build',
@@ -51,103 +53,119 @@ def run(self):
         self.copy_scripts()
 
     def copy_scripts(self):
-        r"""Copy each script listed in 'self.scripts'; if it's marked as a
-        Python script in the Unix way (first line matches 'first_line_re',
-        ie. starts with "\#!" and contains "python"), then adjust the first
-        line to refer to the current Python interpreter as we copy.
+        """
+        Copy each script listed in ``self.scripts``.
+
+        If a script is marked as a Python script (first line matches
+        'shebang_pattern', i.e. starts with ``#!`` and contains
+        "python"), then adjust in the copy the first line to refer to
+        the current Python interpreter.
         """
         self.mkpath(self.build_dir)
         outfiles = []
         updated_files = []
         for script in self.scripts:
-            adjust = False
-            script = convert_path(script)
-            outfile = os.path.join(self.build_dir, os.path.basename(script))
-            outfiles.append(outfile)
-
-            if not self.force and not newer(script, outfile):
-                log.debug("not copying %s (up-to-date)", script)
-                continue
-
-            # Always open the file, but ignore failures in dry-run mode --
-            # that way, we'll get accurate feedback if we can read the
-            # script.
-            try:
-                f = open(script, "rb")
-            except OSError:
-                if not self.dry_run:
-                    raise
-                f = None
-            else:
-                encoding, lines = tokenize.detect_encoding(f.readline)
-                f.seek(0)
-                first_line = f.readline()
-                if not first_line:
-                    self.warn("%s is an empty file (skipping)" % script)
-                    continue
-
-                match = first_line_re.match(first_line)
-                if match:
-                    adjust = True
-                    post_interp = match.group(1) or b''
-
-            if adjust:
-                log.info("copying and adjusting %s -> %s", script,
-                         self.build_dir)
-                updated_files.append(outfile)
-                if not self.dry_run:
-                    if not sysconfig.python_build:
-                        executable = self.executable
-                    else:
-                        executable = os.path.join(
-                            sysconfig.get_config_var("BINDIR"),
-                            "python%s%s" % (
-                                sysconfig.get_config_var("VERSION"),
-                                sysconfig.get_config_var("EXE")))
-                    executable = os.fsencode(executable)
-                    shebang = b"#!" + executable + post_interp + b"\n"
-                    # Python parser starts to read a script using UTF-8 until
-                    # it gets a #coding:xxx cookie. The shebang has to be the
-                    # first line of a file, the #coding:xxx cookie cannot be
-                    # written before. So the shebang has to be decodable from
-                    # UTF-8.
-                    try:
-                        shebang.decode('utf-8')
-                    except UnicodeDecodeError:
-                        raise ValueError(
-                            "The shebang ({!r}) is not decodable "
-                            "from utf-8".format(shebang))
-                    # If the script is encoded to a custom encoding (use a
-                    # #coding:xxx cookie), the shebang has to be decodable from
-                    # the script encoding too.
-                    try:
-                        shebang.decode(encoding)
-                    except UnicodeDecodeError:
-                        raise ValueError(
-                            "The shebang ({!r}) is not decodable "
-                            "from the script encoding ({})"
-                            .format(shebang, encoding))
-                    with open(outfile, "wb") as outf:
-                        outf.write(shebang)
-                        outf.writelines(f.readlines())
-                if f:
-                    f.close()
-            else:
-                if f:
-                    f.close()
-                updated_files.append(outfile)
-                self.copy_file(script, outfile)
-
-        if os.name == 'posix':
-            for file in outfiles:
-                if self.dry_run:
-                    log.info("changing mode of %s", file)
-                else:
-                    oldmode = os.stat(file)[ST_MODE] & 0o7777
-                    newmode = (oldmode | 0o555) & 0o7777
-                    if newmode != oldmode:
-                        log.info("changing mode of %s from %o to %o",
-                                 file, oldmode, newmode)
-                        os.chmod(file, newmode)
-        # XXX should we modify self.outfiles?
+            self._copy_script(script, outfiles, updated_files)
+
+        self._change_modes(outfiles)
+
         return outfiles, updated_files
+
+    def _copy_script(self, script, outfiles, updated_files):
+        shebang_match = None
+        script = convert_path(script)
+        outfile = os.path.join(self.build_dir, os.path.basename(script))
+        outfiles.append(outfile)
+
+        if not self.force and not newer(script, outfile):
+            log.debug("not copying %s (up-to-date)", script)
+            return
+
+        # Always open the file, but ignore failures in dry-run mode
+        # in order to attempt to copy directly.
+        try:
+            f = open(script, "rb")
+        except OSError:
+            if not self.dry_run:
+                raise
+            f = None
+        else:
+            encoding, lines = tokenize.detect_encoding(f.readline)
+            f.seek(0)
+            first_line = f.readline()
+            if not first_line:
+                self.warn("%s is an empty file (skipping)" % script)
+                return
+
+            shebang_match = shebang_pattern.match(first_line)
+
+        updated_files.append(outfile)
+        if shebang_match:
+            log.info("copying and adjusting %s -> %s", script,
+                     self.build_dir)
+            if not self.dry_run:
+                if not sysconfig.python_build:
+                    executable = self.executable
+                else:
+                    executable = os.path.join(
+                        sysconfig.get_config_var("BINDIR"),
+                        "python%s%s" % (
+                            sysconfig.get_config_var("VERSION"),
+                            sysconfig.get_config_var("EXE")))
+                executable = os.fsencode(executable)
+                post_interp = shebang_match.group(1) or b''
+                shebang = b"#!" + executable + post_interp + b"\n"
+                self._validate_shebang(shebang, encoding)
+                with open(outfile, "wb") as outf:
+                    outf.write(shebang)
+                    outf.writelines(f.readlines())
+            if f:
+                f.close()
+        else:
+            if f:
+                f.close()
+            self.copy_file(script, outfile)
+
+    def _change_modes(self, outfiles):
+        if os.name != 'posix':
+            return
+
+        for file in outfiles:
+            self._change_mode(file)
+
+    def _change_mode(self, file):
+        if self.dry_run:
+            log.info("changing mode of %s", file)
+            return
+
+        oldmode = os.stat(file)[ST_MODE] & 0o7777
+        newmode = (oldmode | 0o555) & 0o7777
+        if newmode != oldmode:
+            log.info("changing mode of %s from %o to %o",
+                     file, oldmode, newmode)
+            os.chmod(file, newmode)
+
+    @staticmethod
+    def _validate_shebang(shebang, encoding):
+        # Python parser starts to read a script using UTF-8 until
+        # it gets a #coding:xxx cookie. The shebang has to be the
+        # first line of a file, the #coding:xxx cookie cannot be
+        # written before. So the shebang has to be decodable from
+        # UTF-8.
+        try:
+            shebang.decode('utf-8')
+        except UnicodeDecodeError:
+            raise ValueError(
+                "The shebang ({!r}) is not decodable "
+                "from utf-8".format(shebang))
+
+        # If the script is encoded to a custom encoding (use a
+        # #coding:xxx cookie), the shebang has to be decodable from
+        # the script encoding too.
+        try:
+            shebang.decode(encoding)
+        except UnicodeDecodeError:
+            raise ValueError(
+                "The shebang ({!r}) is not decodable "
+                "from the script encoding ({})"
+                .format(shebang, encoding))