From 1c65efbe73513028510fe3fb599777a721f8d80c Mon Sep 17 00:00:00 2001 From: Eliah Kagan Date: Fri, 15 Dec 2023 08:48:59 -0500 Subject: [PATCH 01/16] Show "not from cwd" test is broken for shell=True This adds a test_it_executes_git_not_from_cwd case for shell=True. (This case also gives the command as a string, so the test need not be further special-cased for non-Windows systems, where argument lists aren't accepted with shell=True.) The test did not attempt to cover the shell=True case before, because I had erroneously assumed it worked similarity. It is actually very different, because when a shell is used, both the shell and the command the shell runs must be found and executed, and because the process creation GitPython performs is that of the shell process, with the state of the shell process being what is relevant to how the path search is done for the git (or other) command. The code change here does not itself demonstrate that the test is broken for shell=True, because that case passes. However, manually undoing the fix in cmd.py for CVE-2023-40590, which as expected causes the preexisting (implicitly shell=False case) to fail, does *not* cause the new shell=True case to fail. That case passes! That passing result in the absence of a fix for CVE-2023-40590 is erroneous, because the cmd.exe shell does search the CWD first when nothing has been done to prevent it. --- test/test_git.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/test/test_git.py b/test/test_git.py index 6d0ae82d0..beb2767f7 100644 --- a/test/test_git.py +++ b/test/test_git.py @@ -134,7 +134,13 @@ def test_it_logs_istream_summary_for_stdin(self, case): def test_it_executes_git_and_returns_result(self): self.assertRegex(self.git.execute(["git", "version"]), r"^git version [\d\.]{2}.*$") - def test_it_executes_git_not_from_cwd(self): + @ddt.data( + (["git", "version"], False), + ("git version", True), + ) + def test_it_executes_git_not_from_cwd(self, case): + command, shell = case + with TemporaryDirectory() as tmpdir: if os.name == "nt": # Copy an actual binary executable that is not git. @@ -149,7 +155,9 @@ def test_it_executes_git_not_from_cwd(self): os.chmod(impostor_path, 0o755) with cwd(tmpdir): - self.assertRegex(self.git.execute(["git", "version"]), r"^git version\b") + output = self.git.execute(command, shell=shell) + + self.assertRegex(output, r"^git version\b") @skipUnless( os.name == "nt", From 2b47933fba1e9266edb1bb71163768288bbfd409 Mon Sep 17 00:00:00 2001 From: Eliah Kagan Date: Fri, 15 Dec 2023 08:54:12 -0500 Subject: [PATCH 02/16] Correct the "not from cwd" test and add more cases This shows that CVE-2023-40590 is only partially patched. This commit does not fix the vulnerbility, only the test. The problem is that, when shell=True, the environment of the shell subprocess, instead of the GitPython process, determines whether searching for the "git" command will use the current directory. Currently NoDefaultCurrentDirectoryInExePath is set only in the current process, and this is done after the environment for the subprocess (env) is computed. So when git is an indirect subprocess due to shell=True, Windows still checks a CWD when looking it up. (Note that this should not be a problem for indirect subprocesses started by git itself. When a standard git command is implemented as an external executable, when git runs a custom command, and when one git command delegates some of its work to another -- e.g. "git clone" running git-remote-https -- Git for Windows already does not search the current directory. Custom git commands that start their own git subprocesses could have an analogous path search bug, but this would be separate from the bug in GitPython.) This is an exploitable vulnerability in GitPython. Although shell=True is rarer and inherently less secure than the default of shell=False, it still ought to avoid automatically running an executable that may exist only due to having been cloned as part of an untrusted repository. In addition, historically programs on Windows had sometimes used shell=True as a workaround for console windows being displayed for subprocesses, and some such code may still be in use. Furthermore, even when GitPython's current working directory is outside the repository being worked on, the Git object in a Repo instance's "git" attribute holds the repository directory as its "_working_dir" attribute, which Git.execute consults to determine the value of "cwd" to pass to subprocess.Popen. When the created direct subprocess is really a shell, this is the CWD where git.exe is looked for before searching PATH directories. This is also why previous, more limited testing (including accompanying manual testing with shell=True) didn't discover the bug. Even when modified to test with shell=True, the old test had the "impostor" git.exe in the GitPython process's own current directory (which was changed to a temporary directory for the test, where the "impostor" was created, but this was separate from the working tree of the self.git repository). This was effective for the shell=False case, but not the shell=True case where the impostor would be found and run in the repository directory even when it differs from the GitPython process's CWD. --- test/lib/helper.py | 4 ++-- test/test_git.py | 51 +++++++++++++++++++++++++++------------------- 2 files changed, 32 insertions(+), 23 deletions(-) diff --git a/test/lib/helper.py b/test/lib/helper.py index d662b632d..bfc8cdc43 100644 --- a/test/lib/helper.py +++ b/test/lib/helper.py @@ -88,11 +88,11 @@ def with_rw_directory(func): test succeeds, but leave it otherwise to aid additional debugging.""" @wraps(func) - def wrapper(self): + def wrapper(self, *args, **kwargs): path = tempfile.mkdtemp(prefix=func.__name__) keep = False try: - return func(self, path) + return func(self, path, *args, **kwargs) except Exception: log.info( "Test %s.%s failed, output is at %r\n", diff --git a/test/test_git.py b/test/test_git.py index beb2767f7..4421ab0ee 100644 --- a/test/test_git.py +++ b/test/test_git.py @@ -3,6 +3,7 @@ # This module is part of GitPython and is released under the # 3-Clause BSD License: https://opensource.org/license/bsd-3-clause/ +import contextlib import gc import inspect import logging @@ -12,7 +13,7 @@ import shutil import subprocess import sys -from tempfile import TemporaryDirectory, TemporaryFile +from tempfile import TemporaryFile from unittest import skipUnless if sys.version_info >= (3, 8): @@ -135,27 +136,35 @@ def test_it_executes_git_and_returns_result(self): self.assertRegex(self.git.execute(["git", "version"]), r"^git version [\d\.]{2}.*$") @ddt.data( - (["git", "version"], False), - ("git version", True), + (False, False, ["git", "version"]), + (False, True, "git version"), + (True, False, ["git", "version"]), + (True, True, "git version"), ) - def test_it_executes_git_not_from_cwd(self, case): - command, shell = case - - with TemporaryDirectory() as tmpdir: - if os.name == "nt": - # Copy an actual binary executable that is not git. - other_exe_path = os.path.join(os.getenv("WINDIR"), "system32", "hostname.exe") - impostor_path = os.path.join(tmpdir, "git.exe") - shutil.copy(other_exe_path, impostor_path) - else: - # Create a shell script that doesn't do anything. - impostor_path = os.path.join(tmpdir, "git") - with open(impostor_path, mode="w", encoding="utf-8") as file: - print("#!/bin/sh", file=file) - os.chmod(impostor_path, 0o755) - - with cwd(tmpdir): - output = self.git.execute(command, shell=shell) + @with_rw_directory + def test_it_executes_git_not_from_cwd(self, rw_dir, case): + chdir_to_repo, shell, command = case + + repo = Repo.init(rw_dir) + + if os.name == "nt": + # Copy an actual binary executable that is not git. (On Windows, running + # "hostname" only displays the hostname, it never tries to change it.) + other_exe_path = os.path.join(os.getenv("WINDIR"), "system32", "hostname.exe") + impostor_path = os.path.join(rw_dir, "git.exe") + shutil.copy(other_exe_path, impostor_path) + else: + # Create a shell script that doesn't do anything. + impostor_path = os.path.join(rw_dir, "git") + with open(impostor_path, mode="w", encoding="utf-8") as file: + print("#!/bin/sh", file=file) + os.chmod(impostor_path, 0o755) + + with cwd(rw_dir) if chdir_to_repo else contextlib.nullcontext(): + # Run the command without raising an exception on failure, as the exception + # message is currently misleading when the command is a string rather than a + # sequence of strings (it really runs "git", but then wrongly reports "g"). + output = repo.git.execute(command, with_exceptions=False, shell=shell) self.assertRegex(output, r"^git version\b") From c1f6c17182d59b9098aeac272dba79d912c5fa42 Mon Sep 17 00:00:00 2001 From: Eliah Kagan Date: Fri, 15 Dec 2023 11:22:18 -0500 Subject: [PATCH 03/16] Use SystemRoot instead of WINDIR, to fix tox In most cases they will be the same, but WINDIR may be absent (or have a different value?) in rare cases. The practical reason it matters in GitPython's tests is that tox automatically passes SystemRoot through to environments on Windows. --- test/test_git.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test_git.py b/test/test_git.py index 4421ab0ee..7f42821a8 100644 --- a/test/test_git.py +++ b/test/test_git.py @@ -150,7 +150,7 @@ def test_it_executes_git_not_from_cwd(self, rw_dir, case): if os.name == "nt": # Copy an actual binary executable that is not git. (On Windows, running # "hostname" only displays the hostname, it never tries to change it.) - other_exe_path = os.path.join(os.getenv("WINDIR"), "system32", "hostname.exe") + other_exe_path = os.path.join(os.environ["SystemRoot"], "system32", "hostname.exe") impostor_path = os.path.join(rw_dir, "git.exe") shutil.copy(other_exe_path, impostor_path) else: From 06bd2c7ee2b9a4169c02ba6f4e3bc34ab674af73 Mon Sep 17 00:00:00 2001 From: Eliah Kagan Date: Fri, 15 Dec 2023 11:47:45 -0500 Subject: [PATCH 04/16] Omit CWD in executable search even when shell=True --- git/cmd.py | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/git/cmd.py b/git/cmd.py index 9c94748c5..b3a9959b7 100644 --- a/git/cmd.py +++ b/git/cmd.py @@ -985,23 +985,31 @@ def execute( if inline_env is not None: env.update(inline_env) + if shell is None: + shell = self.USE_SHELL + + cmd_not_found_exception = FileNotFoundError + maybe_patch_caller_env = contextlib.nullcontext() + if os.name == "nt": - cmd_not_found_exception = OSError if kill_after_timeout is not None: raise GitCommandError( redacted_command, '"kill_after_timeout" feature is not supported on Windows.', ) - # Only search PATH, not CWD. This must be in the *caller* environment. The "1" can be any value. - maybe_patch_caller_env = patch_env("NoDefaultCurrentDirectoryInExePath", "1") - else: - cmd_not_found_exception = FileNotFoundError - maybe_patch_caller_env = contextlib.nullcontext() + + cmd_not_found_exception = OSError + + # Search PATH, but do not search CWD. The "1" can be any value. + if shell: + # If the direct subprocess is a shell, this must go in its environment. + env["NoDefaultCurrentDirectoryInExePath"] = "1" + else: + # If we're not using a shell, the variable goes in our own environment. + maybe_patch_caller_env = patch_env("NoDefaultCurrentDirectoryInExePath", "1") # END handle stdout_sink = PIPE if with_stdout else getattr(subprocess, "DEVNULL", None) or open(os.devnull, "wb") - if shell is None: - shell = self.USE_SHELL log.debug( "Popen(%s, cwd=%s, stdin=%s, shell=%s, universal_newlines=%s)", redacted_command, From 7da9c3b45a19302b346474ea3fdcda1594c33b08 Mon Sep 17 00:00:00 2001 From: Eliah Kagan Date: Fri, 15 Dec 2023 12:01:43 -0500 Subject: [PATCH 05/16] Refactor "not from cwd" test for readability By using pathlib.Path instead of os.path functions. --- test/test_git.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/test/test_git.py b/test/test_git.py index 7f42821a8..f4f716660 100644 --- a/test/test_git.py +++ b/test/test_git.py @@ -9,6 +9,7 @@ import logging import os import os.path as osp +from pathlib import Path import re import shutil import subprocess @@ -150,14 +151,13 @@ def test_it_executes_git_not_from_cwd(self, rw_dir, case): if os.name == "nt": # Copy an actual binary executable that is not git. (On Windows, running # "hostname" only displays the hostname, it never tries to change it.) - other_exe_path = os.path.join(os.environ["SystemRoot"], "system32", "hostname.exe") - impostor_path = os.path.join(rw_dir, "git.exe") + other_exe_path = Path(os.environ["SystemRoot"], "system32", "hostname.exe") + impostor_path = Path(rw_dir, "git.exe") shutil.copy(other_exe_path, impostor_path) else: # Create a shell script that doesn't do anything. - impostor_path = os.path.join(rw_dir, "git") - with open(impostor_path, mode="w", encoding="utf-8") as file: - print("#!/bin/sh", file=file) + impostor_path = Path(rw_dir, "git") + impostor_path.write_text("#!/bin/sh\n", encoding="utf-8") os.chmod(impostor_path, 0o755) with cwd(rw_dir) if chdir_to_repo else contextlib.nullcontext(): From 865c6e8315e8700737981e8fe43a1ac62eb1423a Mon Sep 17 00:00:00 2001 From: Eliah Kagan Date: Fri, 15 Dec 2023 14:13:18 -0500 Subject: [PATCH 06/16] Further expand "not from cwd" test re: cmd.exe On Python versions for which python/cpython#101283 is not patched, using Popen with shell=True can find cmd.exe in the current directory on Windows, in the rare case that the ComSpec environment variable is not defined. This is not necessarily worth addressing, because it is a bug in CPython rahter than GitPython, because that bug has been patched, and because it is very rare that ComSpec is undefined. However: - Changing the code to avoid it would also make that code simpler. - Patched versions of Python <=3.9 don't have python.org builds. This commit just expands the test to add cases where a repository also has a file cmd.exe and where ComSpec is undefined, showing that this case is not covered. --- test/test_git.py | 39 +++++++++++++++++++++++++++++++++------ 1 file changed, 33 insertions(+), 6 deletions(-) diff --git a/test/test_git.py b/test/test_git.py index f4f716660..e80ad37ef 100644 --- a/test/test_git.py +++ b/test/test_git.py @@ -29,6 +29,21 @@ from test.lib import TestBase, fixture_path, with_rw_directory +@contextlib.contextmanager +def _patch_out_env(name): + try: + old_value = os.environ[name] + except KeyError: + old_value = None + else: + del os.environ[name] + try: + yield + finally: + if old_value is not None: + os.environ[name] = old_value + + @ddt.ddt class TestGit(TestBase): @classmethod @@ -137,14 +152,17 @@ def test_it_executes_git_and_returns_result(self): self.assertRegex(self.git.execute(["git", "version"]), r"^git version [\d\.]{2}.*$") @ddt.data( - (False, False, ["git", "version"]), - (False, True, "git version"), - (True, False, ["git", "version"]), - (True, True, "git version"), + # chdir_to_repo, shell, command, use_shell_impostor + (False, False, ["git", "version"], False), + (False, True, "git version", False), + (False, True, "git version", True), + (True, False, ["git", "version"], False), + (True, True, "git version", False), + (True, True, "git version", True), ) @with_rw_directory def test_it_executes_git_not_from_cwd(self, rw_dir, case): - chdir_to_repo, shell, command = case + chdir_to_repo, shell, command, use_shell_impostor = case repo = Repo.init(rw_dir) @@ -160,7 +178,16 @@ def test_it_executes_git_not_from_cwd(self, rw_dir, case): impostor_path.write_text("#!/bin/sh\n", encoding="utf-8") os.chmod(impostor_path, 0o755) - with cwd(rw_dir) if chdir_to_repo else contextlib.nullcontext(): + if use_shell_impostor: + shell_name = "cmd.exe" if os.name == "nt" else "sh" + shutil.copy(impostor_path, Path(rw_dir, shell_name)) + + with contextlib.ExitStack() as stack: + if chdir_to_repo: + stack.enter_context(cwd(rw_dir)) + if use_shell_impostor: + stack.enter_context(_patch_out_env("ComSpec")) + # Run the command without raising an exception on failure, as the exception # message is currently misleading when the command is a string rather than a # sequence of strings (it really runs "git", but then wrongly reports "g"). From d2506c73b6ad59bff1f2f58c4edd15f717012591 Mon Sep 17 00:00:00 2001 From: Eliah Kagan Date: Fri, 15 Dec 2023 15:56:58 -0500 Subject: [PATCH 07/16] Make Git.execute a bit simpler and very slightly more robust This covers the rare unpatched python/cpython#101283 case the previous commit added tests for, that only applies in the unusual situation that the ComSpec environment variable is unset and an old build (but this includes downloadable builds and current actions/setup-python builds) of Python <=3.9 for Windows is in use. The main benefit of this change is really to slightly simplify the code under test. (It might even be justified to remove the use_shell_impostor=True test cases at some point.) --- git/cmd.py | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/git/cmd.py b/git/cmd.py index b3a9959b7..3d26dbad2 100644 --- a/git/cmd.py +++ b/git/cmd.py @@ -988,25 +988,22 @@ def execute( if shell is None: shell = self.USE_SHELL - cmd_not_found_exception = FileNotFoundError - maybe_patch_caller_env = contextlib.nullcontext() - if os.name == "nt": + cmd_not_found_exception = OSError if kill_after_timeout is not None: raise GitCommandError( redacted_command, '"kill_after_timeout" feature is not supported on Windows.', ) - - cmd_not_found_exception = OSError - - # Search PATH, but do not search CWD. The "1" can be any value. + # Search PATH but not CWD. The "1" can be any value. We'll patch just before + # the Popen call and unpatch just after, or we get a worse race condition. + maybe_patch_caller_env = patch_env("NoDefaultCurrentDirectoryInExePath", "1") if shell: - # If the direct subprocess is a shell, this must go in its environment. + # Modify the direct shell subprocess's own search behavior accordingly. env["NoDefaultCurrentDirectoryInExePath"] = "1" - else: - # If we're not using a shell, the variable goes in our own environment. - maybe_patch_caller_env = patch_env("NoDefaultCurrentDirectoryInExePath", "1") + else: + cmd_not_found_exception = FileNotFoundError + maybe_patch_caller_env = contextlib.nullcontext() # END handle stdout_sink = PIPE if with_stdout else getattr(subprocess, "DEVNULL", None) or open(os.devnull, "wb") From 61b4ddacc509b67ec809e15d74de1b60a2a28282 Mon Sep 17 00:00:00 2001 From: Eliah Kagan Date: Fri, 15 Dec 2023 18:35:01 -0500 Subject: [PATCH 08/16] Start on test_hook_uses_shell_not_from_cwd This shows that run_commit_hook is vulnerable to an untrusted search path bug on Windows, when running script hooks: bash.exe is run without setting NoDefaultCurrentDirectoryInExePath or otherwise excluding the current directory from the path search. The new test uses a non-bare repo, even though the surrounding tests use bare repos. Although the test also works if the repo is initialized with `Repo.init(rw_dir, bare=True)`, using a bare repo would obscure how the bug this test reveals would typically be exploited, where a command that uses a hook is run after a malicious bash.exe is checked out into the working tree from an untrusted branch. Running hooks that are themselves provided by an untrusted repository or branch is of course never safe. If an attacker can deceive a user into doing that, then this vulnerability is not needed. Instead, an attack that leverages this untrusted search path vulnerability would most likely be of roughly this form: 1. The user clones a trusted repository and installs hooks. 2. A malicious party offers a contribution to the project. 3. The user checks out the malicious party's untrusted branch. 4. The user performs an action that runs a hook. The hook the user runs is still trusted, but it runs with the malicious bash.exe found in the current directory (which is the working tree or perhaps some subdirectory of it). The test added in this commit should, if possible, be improved to be able to run and detect the bug (or its absence) even when bash is absent from the Windows system and, preferably, also even when the WSL bash.exe is present but no WSL distribution is installed. --- test/test_index.py | 37 ++++++++++++++++++++++++++++++++++++- 1 file changed, 36 insertions(+), 1 deletion(-) diff --git a/test/test_index.py b/test/test_index.py index 50d941e83..eb676add8 100644 --- a/test/test_index.py +++ b/test/test_index.py @@ -3,16 +3,19 @@ # This module is part of GitPython and is released under the # 3-Clause BSD License: https://opensource.org/license/bsd-3-clause/ +import contextlib from io import BytesIO import logging import os import os.path as osp from pathlib import Path import re +import shutil from stat import S_ISLNK, ST_MODE import subprocess import tempfile +import ddt import pytest from sumtypes import constructor, sumtype @@ -36,7 +39,7 @@ from git.index.typ import BaseIndexEntry, IndexEntry from git.index.util import TemporaryFileSwap from git.objects import Blob -from git.util import Actor, hex_to_bin, rmtree +from git.util import Actor, cwd, hex_to_bin, rmtree from gitdb.base import IStream from test.lib import TestBase, fixture, fixture_path, with_rw_directory, with_rw_repo @@ -172,6 +175,7 @@ def _make_hook(git_dir, name, content, make_exec=True): return hp +@ddt.ddt class TestIndex(TestBase): def __init__(self, *args): super().__init__(*args) @@ -1012,6 +1016,37 @@ def test_run_commit_hook(self, rw_repo): output = Path(rw_repo.git_dir, "output.txt").read_text(encoding="utf-8") self.assertEqual(output, "ran fake hook\n") + # FIXME: Figure out a way to make this test also work with Absent and WslNoDistro. + @pytest.mark.xfail( + type(_win_bash_status) is WinBashStatus.WslNoDistro, + reason="Currently uses the bash.exe of WSL, even with no WSL distro installed", + raises=HookExecutionError, + ) + @ddt.data((False,), (True,)) + @with_rw_directory + def test_hook_uses_shell_not_from_cwd(self, rw_dir, case): + (chdir_to_repo,) = case + + repo = Repo.init(rw_dir) + _make_hook(repo.git_dir, "fake-hook", "echo 'ran fake hook' >output.txt") + + if os.name == "nt": + # Copy an actual binary that is not bash. + other_exe_path = Path(os.environ["SystemRoot"], "system32", "hostname.exe") + impostor_path = Path(rw_dir, "bash.exe") + shutil.copy(other_exe_path, impostor_path) + else: + # Create a shell script that doesn't do anything. + impostor_path = Path(rw_dir, "sh") + impostor_path.write_text("#!/bin/sh\n", encoding="utf-8") + os.chmod(impostor_path, 0o755) + + with cwd(rw_dir) if chdir_to_repo else contextlib.nullcontext(): + run_commit_hook("fake-hook", repo.index) + + output = Path(rw_dir, "output.txt").read_text(encoding="utf-8") + self.assertEqual(output, "ran fake hook\n") + @pytest.mark.xfail( type(_win_bash_status) is WinBashStatus.Absent, reason="Can't run a hook on Windows without bash.exe.", From 66ff4c177accfb4f21d3eb476381d248d99fd8b5 Mon Sep 17 00:00:00 2001 From: Eliah Kagan Date: Sun, 17 Dec 2023 06:08:57 -0500 Subject: [PATCH 09/16] Omit CWD in search for bash.exe to run hooks on Windows This uses the same NoDefaultCurrentDirectoryInExePath technique for the Popen call in git.index.fun.run_commit_hook on Windows as is already used in git.cmd.Git.execute. The code is simpler in run_commit_hook because a shell is never used to run bash.exe. (bash.exe is itself a shell, but we never run it *via* a shell by passing shell=True to Popen.) Nonetheless, it may make sense to extract out a helper function both can call. This commit does not do that, so there is some code duplication. --- git/index/fun.py | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/git/index/fun.py b/git/index/fun.py index 402f85d2b..303141bbe 100644 --- a/git/index/fun.py +++ b/git/index/fun.py @@ -3,6 +3,7 @@ """Standalone functions to accompany the index implementation and make it more versatile.""" +import contextlib from io import BytesIO import os import os.path as osp @@ -26,7 +27,7 @@ traverse_trees_recursive, tree_to_stream, ) -from git.util import IndexFileSHA1Writer, finalize_process +from git.util import IndexFileSHA1Writer, finalize_process, patch_env from gitdb.base import IStream from gitdb.typ import str_tree_type @@ -90,6 +91,10 @@ def run_commit_hook(name: str, index: "IndexFile", *args: str) -> None: env = os.environ.copy() env["GIT_INDEX_FILE"] = safe_decode(str(index.path)) env["GIT_EDITOR"] = ":" + if os.name == "nt": + maybe_patch_caller_env = patch_env("NoDefaultCurrentDirectoryInExePath", "1") + else: + maybe_patch_caller_env = contextlib.nullcontext() cmd = [hp] try: if os.name == "nt" and not _has_file_extension(hp): @@ -98,14 +103,15 @@ def run_commit_hook(name: str, index: "IndexFile", *args: str) -> None: relative_hp = Path(hp).relative_to(index.repo.working_dir).as_posix() cmd = ["bash.exe", relative_hp] - process = subprocess.Popen( - cmd + list(args), - env=env, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - cwd=index.repo.working_dir, - creationflags=PROC_CREATIONFLAGS, - ) + with maybe_patch_caller_env: + process = subprocess.Popen( + cmd + list(args), + env=env, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + cwd=index.repo.working_dir, + creationflags=PROC_CREATIONFLAGS, + ) except Exception as ex: raise HookExecutionError(hp, ex) from ex else: From 7751436b94d96ce0978b301681b851edd6efed63 Mon Sep 17 00:00:00 2001 From: Eliah Kagan Date: Mon, 18 Dec 2023 06:17:00 -0500 Subject: [PATCH 10/16] Extract venv management from test_installation - Create and use a test.lib.helper.VirtualEnvironment class. - Import and use venv module instead of running "python -m venv". These changes make no significant difference in speed or clarity for the existing test in test_installation. The reason for this change is instead to support the use of new per-test virtual environments in at least one other test. --- test/lib/helper.py | 41 ++++++++++++++++++++++++++++++++++++++ test/test_installation.py | 42 +++++++++++++++++++-------------------- 2 files changed, 61 insertions(+), 22 deletions(-) diff --git a/test/lib/helper.py b/test/lib/helper.py index bfc8cdc43..2fb8990dd 100644 --- a/test/lib/helper.py +++ b/test/lib/helper.py @@ -14,6 +14,7 @@ import textwrap import time import unittest +import venv import gitdb @@ -36,6 +37,7 @@ "with_rw_repo", "with_rw_and_rw_remote_repo", "TestBase", + "VirtualEnvironment", "TestCase", "SkipTest", "skipIf", @@ -390,3 +392,42 @@ def _make_file(self, rela_path, data, repo=None): with open(abs_path, "w") as fp: fp.write(data) return abs_path + + +class VirtualEnvironment: + """A newly created Python virtual environment for use in a test.""" + + __slots__ = ("_env_dir",) + + def __init__(self, env_dir, *, with_pip): + self._env_dir = env_dir + venv.create(env_dir, symlinks=(os.name != "nt"), with_pip=with_pip) + + @property + def env_dir(self): + """The top-level directory of the environment.""" + return self._env_dir + + @property + def python(self): + """Path to the Python executable in the environment.""" + return self._executable("python") + + @property + def pip(self): + """Path to the pip executable in the environment, or RuntimeError if absent.""" + return self._executable("pip") + + @property + def sources(self): + """Path to a src directory in the environment, which may not exist yet.""" + return os.path.join(self.env_dir, "src") + + def _executable(self, basename): + if os.name == "nt": + path = osp.join(self.env_dir, "Scripts", basename + ".exe") + else: + path = osp.join(self.env_dir, "bin", basename) + if osp.isfile(path) or osp.islink(path): + return path + raise RuntimeError(f"no regular file or symlink {path!r}") diff --git a/test/test_installation.py b/test/test_installation.py index 0a200415b..15ed5b13b 100644 --- a/test/test_installation.py +++ b/test/test_installation.py @@ -4,31 +4,19 @@ import ast import os import subprocess -import sys -from test.lib import TestBase -from test.lib.helper import with_rw_directory +from test.lib import TestBase, VirtualEnvironment, with_rw_directory class TestInstallation(TestBase): - def setUp_venv(self, rw_dir): - self.venv = rw_dir - subprocess.run([sys.executable, "-m", "venv", self.venv], stdout=subprocess.PIPE) - bin_name = "Scripts" if os.name == "nt" else "bin" - self.python = os.path.join(self.venv, bin_name, "python") - self.pip = os.path.join(self.venv, bin_name, "pip") - self.sources = os.path.join(self.venv, "src") - self.cwd = os.path.dirname(os.path.dirname(__file__)) - os.symlink(self.cwd, self.sources, target_is_directory=True) - @with_rw_directory def test_installation(self, rw_dir): - self.setUp_venv(rw_dir) + venv = self._set_up_venv(rw_dir) result = subprocess.run( - [self.pip, "install", "."], + [venv.pip, "install", "."], stdout=subprocess.PIPE, - cwd=self.sources, + cwd=venv.sources, ) self.assertEqual( 0, @@ -37,9 +25,9 @@ def test_installation(self, rw_dir): ) result = subprocess.run( - [self.python, "-c", "import git"], + [venv.python, "-c", "import git"], stdout=subprocess.PIPE, - cwd=self.sources, + cwd=venv.sources, ) self.assertEqual( 0, @@ -48,9 +36,9 @@ def test_installation(self, rw_dir): ) result = subprocess.run( - [self.python, "-c", "import gitdb; import smmap"], + [venv.python, "-c", "import gitdb; import smmap"], stdout=subprocess.PIPE, - cwd=self.sources, + cwd=venv.sources, ) self.assertEqual( 0, @@ -62,9 +50,9 @@ def test_installation(self, rw_dir): # by inserting its location into PYTHONPATH or otherwise patched into # sys.path, make sure it is not wrongly inserted as the *first* entry. result = subprocess.run( - [self.python, "-c", "import sys; import git; print(sys.path)"], + [venv.python, "-c", "import sys; import git; print(sys.path)"], stdout=subprocess.PIPE, - cwd=self.sources, + cwd=venv.sources, ) syspath = result.stdout.decode("utf-8").splitlines()[0] syspath = ast.literal_eval(syspath) @@ -73,3 +61,13 @@ def test_installation(self, rw_dir): syspath[0], msg="Failed to follow the conventions for https://docs.python.org/3/library/sys.html#sys.path", ) + + @staticmethod + def _set_up_venv(rw_dir): + venv = VirtualEnvironment(rw_dir, with_pip=True) + os.symlink( + os.path.dirname(os.path.dirname(__file__)), + venv.sources, + target_is_directory=True, + ) + return venv From a42ea0a38c489caf9969836141120d760d3754b4 Mon Sep 17 00:00:00 2001 From: Eliah Kagan Date: Mon, 18 Dec 2023 11:54:28 -0500 Subject: [PATCH 11/16] Cover absent/no-distro bash.exe in hooks "not from cwd" test See comments for details on the test's new implementation and what it achieves. --- .pre-commit-config.yaml | 2 +- test/fixtures/polyglot | 8 ++++++ test/test_index.py | 63 ++++++++++++++++++++++++++--------------- 3 files changed, 49 insertions(+), 24 deletions(-) create mode 100755 test/fixtures/polyglot diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index be97d5f9b..1ac5baa00 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -29,7 +29,7 @@ repos: hooks: - id: shellcheck args: [--color] - exclude: ^git/ext/ + exclude: ^test/fixtures/polyglot$|^git/ext/ - repo: https://github.com/pre-commit/pre-commit-hooks rev: v4.4.0 diff --git a/test/fixtures/polyglot b/test/fixtures/polyglot new file mode 100755 index 000000000..f1dd56b26 --- /dev/null +++ b/test/fixtures/polyglot @@ -0,0 +1,8 @@ +#!/usr/bin/env sh +# Valid script in both Bash and Python, but with different behavior. +""":" +echo 'Ran intended hook.' >output.txt +exit +" """ +from pathlib import Path +Path('payload.txt').write_text('Ran impostor hook!', encoding='utf-8') diff --git a/test/test_index.py b/test/test_index.py index eb676add8..ff28f48ae 100644 --- a/test/test_index.py +++ b/test/test_index.py @@ -41,7 +41,14 @@ from git.objects import Blob from git.util import Actor, cwd, hex_to_bin, rmtree from gitdb.base import IStream -from test.lib import TestBase, fixture, fixture_path, with_rw_directory, with_rw_repo +from test.lib import ( + TestBase, + VirtualEnvironment, + fixture, + fixture_path, + with_rw_directory, + with_rw_repo, +) HOOKS_SHEBANG = "#!/usr/bin/env sh\n" @@ -1016,36 +1023,46 @@ def test_run_commit_hook(self, rw_repo): output = Path(rw_repo.git_dir, "output.txt").read_text(encoding="utf-8") self.assertEqual(output, "ran fake hook\n") - # FIXME: Figure out a way to make this test also work with Absent and WslNoDistro. - @pytest.mark.xfail( - type(_win_bash_status) is WinBashStatus.WslNoDistro, - reason="Currently uses the bash.exe of WSL, even with no WSL distro installed", - raises=HookExecutionError, - ) @ddt.data((False,), (True,)) @with_rw_directory def test_hook_uses_shell_not_from_cwd(self, rw_dir, case): (chdir_to_repo,) = case + shell_name = "bash.exe" if os.name == "nt" else "sh" + maybe_chdir = cwd(rw_dir) if chdir_to_repo else contextlib.nullcontext() repo = Repo.init(rw_dir) - _make_hook(repo.git_dir, "fake-hook", "echo 'ran fake hook' >output.txt") - if os.name == "nt": - # Copy an actual binary that is not bash. - other_exe_path = Path(os.environ["SystemRoot"], "system32", "hostname.exe") - impostor_path = Path(rw_dir, "bash.exe") - shutil.copy(other_exe_path, impostor_path) + # We need an impostor shell that works on Windows and that can be distinguished + # from the real bash.exe. But even if the real bash.exe is absent or unusable, + # we should verify that the impostor is not run. So the impostor needs a clear + # side effect (unlike in TestGit.test_it_executes_git_not_from_cwd). Popen on + # Windows uses CreateProcessW, which disregards PATHEXT; the impostor may need + # to be a binary executable to ensure the vulnerability is found if present. No + # compiler need exist, shipping a binary in the test suite may target the wrong + # architecture, and generating one in a bespoke way may cause virus scanners to + # give a false positive. So we use a Bash/Python polyglot for the hook and use + # the Python interpreter itself as the bash.exe impostor. But an interpreter + # from a venv may not run outside of it, and a global interpreter won't run from + # a different location if it was installed from the Microsoft Store. So we make + # a new venv in rw_dir and use its interpreter. + venv = VirtualEnvironment(rw_dir, with_pip=False) + shutil.copy(venv.python, Path(rw_dir, shell_name)) + shutil.copy(fixture_path("polyglot"), hook_path("polyglot", repo.git_dir)) + payload = Path(rw_dir, "payload.txt") + + if type(_win_bash_status) in {WinBashStatus.Absent, WinBashStatus.WslNoDistro}: + # The real shell can't run, but the impostor should still not be used. + with self.assertRaises(HookExecutionError): + with maybe_chdir: + run_commit_hook("polyglot", repo.index) + self.assertFalse(payload.exists()) else: - # Create a shell script that doesn't do anything. - impostor_path = Path(rw_dir, "sh") - impostor_path.write_text("#!/bin/sh\n", encoding="utf-8") - os.chmod(impostor_path, 0o755) - - with cwd(rw_dir) if chdir_to_repo else contextlib.nullcontext(): - run_commit_hook("fake-hook", repo.index) - - output = Path(rw_dir, "output.txt").read_text(encoding="utf-8") - self.assertEqual(output, "ran fake hook\n") + # The real shell should run, and not the impostor. + with maybe_chdir: + run_commit_hook("polyglot", repo.index) + self.assertFalse(payload.exists()) + output = Path(rw_dir, "output.txt").read_text(encoding="utf-8") + self.assertEqual(output, "Ran intended hook.\n") @pytest.mark.xfail( type(_win_bash_status) is WinBashStatus.Absent, From f44524a9a9c8122b9b98d6e5797e1dfc3211c0b7 Mon Sep 17 00:00:00 2001 From: Eliah Kagan Date: Tue, 19 Dec 2023 08:11:04 +0800 Subject: [PATCH 12/16] Avoid spurious "location may have moved" on Windows The main case where this happens is when tempfile.gettempdir() has a component in it that uses an 8.3-encoded path, e.g., C:\Users\Administrator\... -> C:\Users\ADMINI~1\... This is a workaround for python/cpython#90329. I call realpath only once, when the venv is created, and not on any paths inside the venv, to make it less likely this masks the problems the warning is meant for. (For example, if Scripts, or python.exe in it, produced this even with the venv created as it is now, then that may indicte an actual problem.) Note that copying python.exe from Scripts to one level up in the venv, and changing its name to bash.exe to use it to simulate the bash.exe impostor, as is done in test_hook_uses_shell_not_from_cwd, should not (and does not) produce this warning. If that ever starts to do so, then that should be examined as a sign of brittleness. --- test/lib/helper.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/test/lib/helper.py b/test/lib/helper.py index 2fb8990dd..1fdc56fd1 100644 --- a/test/lib/helper.py +++ b/test/lib/helper.py @@ -400,8 +400,12 @@ class VirtualEnvironment: __slots__ = ("_env_dir",) def __init__(self, env_dir, *, with_pip): - self._env_dir = env_dir - venv.create(env_dir, symlinks=(os.name != "nt"), with_pip=with_pip) + if os.name == "nt": + self._env_dir = osp.realpath(env_dir) + venv.create(self.env_dir, symlinks=False, with_pip=with_pip) + else: + self._env_dir = env_dir + venv.create(self.env_dir, symlinks=True, with_pip=with_pip) @property def env_dir(self): From 15ebb258d4eebd9bf0f38780570d57e0b968b8de Mon Sep 17 00:00:00 2001 From: Eliah Kagan Date: Mon, 18 Dec 2023 19:28:26 -0500 Subject: [PATCH 13/16] Clarify comment in test_hook_uses_shell_not_from_cwd --- test/test_index.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/test/test_index.py b/test/test_index.py index ff28f48ae..d352faa6c 100644 --- a/test/test_index.py +++ b/test/test_index.py @@ -1042,9 +1042,9 @@ def test_hook_uses_shell_not_from_cwd(self, rw_dir, case): # architecture, and generating one in a bespoke way may cause virus scanners to # give a false positive. So we use a Bash/Python polyglot for the hook and use # the Python interpreter itself as the bash.exe impostor. But an interpreter - # from a venv may not run outside of it, and a global interpreter won't run from - # a different location if it was installed from the Microsoft Store. So we make - # a new venv in rw_dir and use its interpreter. + # from a venv may not run when copied outside of it, and a global interpreter + # won't run when copied to a different location if it was installed from the + # Microsoft Store. So we make a new venv in rw_dir and use its interpreter. venv = VirtualEnvironment(rw_dir, with_pip=False) shutil.copy(venv.python, Path(rw_dir, shell_name)) shutil.copy(fixture_path("polyglot"), hook_path("polyglot", repo.git_dir)) From c551e916c7b9e2d623b9d76f3352849a707d9bbe Mon Sep 17 00:00:00 2001 From: Eliah Kagan Date: Wed, 20 Dec 2023 00:40:43 -0500 Subject: [PATCH 14/16] Extract shared logic for using Popen safely on Windows This creates git.util.safer_popen that, on non-Windows systems, is bound to subprocess.Popen (to avoid introducing unnecessary latency). On Windows, it is a function that wraps subprocess.Popen, consolidating two pieces of logic that had previously been duplicated: 1. Temporarily setting NoDefaultCurrentDirectoryInExePath in the calling environment and, when shell=True is used, setting it in the subprocess environment as well. This prevents executables specified as single names (which are mainly "git" and, for hooks, "bash.exe") from being searched for in the current working directory of GitPython or, when a shell is used, the current working directory of the shell used to run them. 2. Passing the CREATE_NO_WINDOW and CREATE_NEW_PROCESS_GROUP flags as creationflags. This is not a security measure. It is indirectly related to safety in that CREATE_NO_WINDOW eliminated at least some, and possibly all, cases where calling Git.execute (directly, or indirectly via a dynamic method) with shell=True conferred an advantage over the inherently more secure default of shell=False; and CREATE_NEW_PROCESS facilitates some ways of terminating subprocesses that would otherwise be unavailable, thereby making resource exhaustion less likely. But really the reason I included creationflags here is that it seems it should always be used in the same situations as preventing the current directory from being searched (and always was), and including it further reduces code duplication and simplifies calling code. This commit does not improve security or robustness, because these features were already present. Instead, this moves them to a single location. It also documents them by giving the function bound to safer_popen on Windows, _safer_popen_windows, a detailed docstring. Because there would otherwise be potential for confusion on the different ways to perform or customize path searches, I have also added a doctring to py_where noting its limited use case and its relationship to shutil.which and non-shell search. (The search in _safer_popen_windows is typically a non-shell search, which is why it cannot be reimplemented to do its own lookup by calling an only slightly modified version of shutil.which, without a risk of breaking some currently working uses. It may, however, be possible to fix the race condition by doing something analogous for Windows non-shell search behavior, which is largely but not entirely described in the documentation for CreateProcessW.) --- git/cmd.py | 48 ++++++++++--------------------- git/index/fun.py | 25 ++++++----------- git/util.py | 73 ++++++++++++++++++++++++++++++++++++++++++++++++ test/test_git.py | 19 ++++++------- 4 files changed, 106 insertions(+), 59 deletions(-) diff --git a/git/cmd.py b/git/cmd.py index 3d26dbad2..c4dea0393 100644 --- a/git/cmd.py +++ b/git/cmd.py @@ -29,8 +29,8 @@ cygpath, expand_path, is_cygwin_git, - patch_env, remove_password_if_present, + safer_popen, stream_copy, ) @@ -225,14 +225,6 @@ def dict_to_slots_and__excluded_are_none(self: object, d: Mapping[str, Any], exc ## -- End Utilities -- @} -if os.name == "nt": - # CREATE_NEW_PROCESS_GROUP is needed to allow killing it afterwards. See: - # https://docs.python.org/3/library/subprocess.html#subprocess.Popen.send_signal - PROC_CREATIONFLAGS = subprocess.CREATE_NO_WINDOW | subprocess.CREATE_NEW_PROCESS_GROUP -else: - PROC_CREATIONFLAGS = 0 - - class Git(LazyMixin): """The Git class manages communication with the Git binary. @@ -985,9 +977,6 @@ def execute( if inline_env is not None: env.update(inline_env) - if shell is None: - shell = self.USE_SHELL - if os.name == "nt": cmd_not_found_exception = OSError if kill_after_timeout is not None: @@ -995,18 +984,13 @@ def execute( redacted_command, '"kill_after_timeout" feature is not supported on Windows.', ) - # Search PATH but not CWD. The "1" can be any value. We'll patch just before - # the Popen call and unpatch just after, or we get a worse race condition. - maybe_patch_caller_env = patch_env("NoDefaultCurrentDirectoryInExePath", "1") - if shell: - # Modify the direct shell subprocess's own search behavior accordingly. - env["NoDefaultCurrentDirectoryInExePath"] = "1" else: cmd_not_found_exception = FileNotFoundError - maybe_patch_caller_env = contextlib.nullcontext() # END handle stdout_sink = PIPE if with_stdout else getattr(subprocess, "DEVNULL", None) or open(os.devnull, "wb") + if shell is None: + shell = self.USE_SHELL log.debug( "Popen(%s, cwd=%s, stdin=%s, shell=%s, universal_newlines=%s)", redacted_command, @@ -1016,20 +1000,18 @@ def execute( universal_newlines, ) try: - with maybe_patch_caller_env: - proc = Popen( - command, - env=env, - cwd=cwd, - bufsize=-1, - stdin=(istream or DEVNULL), - stderr=PIPE, - stdout=stdout_sink, - shell=shell, - universal_newlines=universal_newlines, - creationflags=PROC_CREATIONFLAGS, - **subprocess_kwargs, - ) + proc = safer_popen( + command, + env=env, + cwd=cwd, + bufsize=-1, + stdin=(istream or DEVNULL), + stderr=PIPE, + stdout=stdout_sink, + shell=shell, + universal_newlines=universal_newlines, + **subprocess_kwargs, + ) except cmd_not_found_exception as err: raise GitCommandNotFound(redacted_command, err) from err else: diff --git a/git/index/fun.py b/git/index/fun.py index 303141bbe..500fb251d 100644 --- a/git/index/fun.py +++ b/git/index/fun.py @@ -3,7 +3,6 @@ """Standalone functions to accompany the index implementation and make it more versatile.""" -import contextlib from io import BytesIO import os import os.path as osp @@ -19,7 +18,7 @@ ) import subprocess -from git.cmd import PROC_CREATIONFLAGS, handle_process_output +from git.cmd import handle_process_output from git.compat import defenc, force_bytes, force_text, safe_decode from git.exc import HookExecutionError, UnmergedEntriesError from git.objects.fun import ( @@ -27,7 +26,7 @@ traverse_trees_recursive, tree_to_stream, ) -from git.util import IndexFileSHA1Writer, finalize_process, patch_env +from git.util import IndexFileSHA1Writer, finalize_process, safer_popen from gitdb.base import IStream from gitdb.typ import str_tree_type @@ -91,10 +90,6 @@ def run_commit_hook(name: str, index: "IndexFile", *args: str) -> None: env = os.environ.copy() env["GIT_INDEX_FILE"] = safe_decode(str(index.path)) env["GIT_EDITOR"] = ":" - if os.name == "nt": - maybe_patch_caller_env = patch_env("NoDefaultCurrentDirectoryInExePath", "1") - else: - maybe_patch_caller_env = contextlib.nullcontext() cmd = [hp] try: if os.name == "nt" and not _has_file_extension(hp): @@ -103,15 +98,13 @@ def run_commit_hook(name: str, index: "IndexFile", *args: str) -> None: relative_hp = Path(hp).relative_to(index.repo.working_dir).as_posix() cmd = ["bash.exe", relative_hp] - with maybe_patch_caller_env: - process = subprocess.Popen( - cmd + list(args), - env=env, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - cwd=index.repo.working_dir, - creationflags=PROC_CREATIONFLAGS, - ) + process = safer_popen( + cmd + list(args), + env=env, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + cwd=index.repo.working_dir, + ) except Exception as ex: raise HookExecutionError(hp, ex) from ex else: diff --git a/git/util.py b/git/util.py index c5afea241..5f5fb8566 100644 --- a/git/util.py +++ b/git/util.py @@ -33,6 +33,7 @@ IO, Iterator, List, + Mapping, Optional, Pattern, Sequence, @@ -327,6 +328,17 @@ def _get_exe_extensions() -> Sequence[str]: def py_where(program: str, path: Optional[PathLike] = None) -> List[str]: + """Perform a path search to assist :func:`is_cygwin_git`. + + This is not robust for general use. It is an implementation detail of + :func:`is_cygwin_git`. When a search following all shell rules is needed, + :func:`shutil.which` can be used instead. + + :note: Neither this function nor :func:`shutil.which` will predict the effect of an + executable search on a native Windows system due to a :class:`subprocess.Popen` + call without ``shell=True``, because shell and non-shell executable search on + Windows differ considerably. + """ # From: http://stackoverflow.com/a/377028/548792 winprog_exts = _get_exe_extensions() @@ -524,6 +536,67 @@ def remove_password_if_present(cmdline: Sequence[str]) -> List[str]: return new_cmdline +def _safer_popen_windows( + command: Union[str, Sequence[Any]], + *, + shell: bool = False, + env: Optional[Mapping[str, str]] = None, + **kwargs: Any, +) -> subprocess.Popen: + """Call :class:`subprocess.Popen` on Windows but don't include a CWD in the search. + + This avoids an untrusted search path condition where a file like ``git.exe`` in a + malicious repository would be run when GitPython operates on the repository. The + process using GitPython may have an untrusted repository's working tree as its + current working directory. Some operations may temporarily change to that directory + before running a subprocess. In addition, while by default GitPython does not run + external commands with a shell, it can be made to do so, in which case the CWD of + the subprocess, which GitPython usually sets to a repository working tree, can + itself be searched automatically by the shell. This wrapper covers all those cases. + + :note: This currently works by setting the ``NoDefaultCurrentDirectoryInExePath`` + environment variable during subprocess creation. It also takes care of passing + Windows-specific process creation flags, but that is unrelated to path search. + + :note: The current implementation contains a race condition on :attr:`os.environ`. + GitPython isn't thread-safe, but a program using it on one thread should ideally + be able to mutate :attr:`os.environ` on another, without unpredictable results. + See comments in https://github.com/gitpython-developers/GitPython/pull/1650. + """ + # CREATE_NEW_PROCESS_GROUP is needed for some ways of killing it afterwards. See: + # https://docs.python.org/3/library/subprocess.html#subprocess.Popen.send_signal + # https://docs.python.org/3/library/subprocess.html#subprocess.CREATE_NEW_PROCESS_GROUP + creationflags = subprocess.CREATE_NO_WINDOW | subprocess.CREATE_NEW_PROCESS_GROUP + + # When using a shell, the shell is the direct subprocess, so the variable must be + # set in its environment, to affect its search behavior. (The "1" can be any value.) + if shell: + safer_env = {} if env is None else dict(env) + safer_env["NoDefaultCurrentDirectoryInExePath"] = "1" + else: + safer_env = env + + # When not using a shell, the current process does the search in a CreateProcessW + # API call, so the variable must be set in our environment. With a shell, this is + # unnecessary, in versions where https://github.com/python/cpython/issues/101283 is + # patched. If not, in the rare case the ComSpec environment variable is unset, the + # shell is searched for unsafely. Setting NoDefaultCurrentDirectoryInExePath in all + # cases, as here, is simpler and protects against that. (The "1" can be any value.) + with patch_env("NoDefaultCurrentDirectoryInExePath", "1"): + return subprocess.Popen( + command, + shell=shell, + env=safer_env, + creationflags=creationflags, + **kwargs, + ) + + +if os.name == "nt": + safer_popen = _safer_popen_windows +else: + safer_popen = subprocess.Popen + # } END utilities # { Classes diff --git a/test/test_git.py b/test/test_git.py index e80ad37ef..39e19bb8a 100644 --- a/test/test_git.py +++ b/test/test_git.py @@ -25,7 +25,7 @@ import ddt from git import Git, refresh, GitCommandError, GitCommandNotFound, Repo, cmd -from git.util import cwd, finalize_process +from git.util import cwd, finalize_process, safer_popen from test.lib import TestBase, fixture_path, with_rw_directory @@ -115,28 +115,28 @@ def test_it_transforms_kwargs_into_git_command_arguments(self): def _do_shell_combo(self, value_in_call, value_from_class): with mock.patch.object(Git, "USE_SHELL", value_from_class): # git.cmd gets Popen via a "from" import, so patch it there. - with mock.patch.object(cmd, "Popen", wraps=cmd.Popen) as mock_popen: + with mock.patch.object(cmd, "safer_popen", wraps=cmd.safer_popen) as mock_safer_popen: # Use a command with no arguments (besides the program name), so it runs # with or without a shell, on all OSes, with the same effect. self.git.execute(["git"], with_exceptions=False, shell=value_in_call) - return mock_popen + return mock_safer_popen @ddt.idata(_shell_cases) def test_it_uses_shell_or_not_as_specified(self, case): """A bool passed as ``shell=`` takes precedence over `Git.USE_SHELL`.""" value_in_call, value_from_class, expected_popen_arg = case - mock_popen = self._do_shell_combo(value_in_call, value_from_class) - mock_popen.assert_called_once() - self.assertIs(mock_popen.call_args.kwargs["shell"], expected_popen_arg) + mock_safer_popen = self._do_shell_combo(value_in_call, value_from_class) + mock_safer_popen.assert_called_once() + self.assertIs(mock_safer_popen.call_args.kwargs["shell"], expected_popen_arg) @ddt.idata(full_case[:2] for full_case in _shell_cases) def test_it_logs_if_it_uses_a_shell(self, case): """``shell=`` in the log message agrees with what is passed to `Popen`.""" value_in_call, value_from_class = case with self.assertLogs(cmd.log, level=logging.DEBUG) as log_watcher: - mock_popen = self._do_shell_combo(value_in_call, value_from_class) - self._assert_logged_for_popen(log_watcher, "shell", mock_popen.call_args.kwargs["shell"]) + mock_safer_popen = self._do_shell_combo(value_in_call, value_from_class) + self._assert_logged_for_popen(log_watcher, "shell", mock_safer_popen.call_args.kwargs["shell"]) @ddt.data( ("None", None), @@ -405,13 +405,12 @@ def counter_stderr(line): fixture_path("cat_file.py"), str(fixture_path("issue-301_stderr")), ] - proc = subprocess.Popen( + proc = safer_popen( cmdline, stdin=None, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=False, - creationflags=cmd.PROC_CREATIONFLAGS, ) handle_process_output(proc, counter_stdout, counter_stderr, finalize_process) From 3eb7c2ab82e6dbe101ff916fca29d539cc2793af Mon Sep 17 00:00:00 2001 From: Eliah Kagan Date: Thu, 21 Dec 2023 03:48:53 -0500 Subject: [PATCH 15/16] Move safer_popen from git.util to git.cmd I had originally written it in git.util because it is used from two separate modules (git.cmd and git.index.fun) and is arguably the same sort of thing as remove_password_if_present, in that both relate to running processes (and to security) and both are used from multiple modules yet are not meant for use outside GitPython. However, all of this is also true of git.cmd.handle_process_output, which this really has more in common with: it's a utility related *only* to the use of subprocesses, while remove_password_if_present can be used for other sanitization. In addition, it also replaces git.cmd.PROC_CREATIONFLAGS (also imported in git.index.fun) by passing that to Popen on Windows (since the situations where a custom value of creationflags should be used are the same as those where safer_popen should be called for its primary benefit of avoiding an untrusted path search when creating the subprocess). safer_popen and its Windows implementation _safer_popen_windows are thus moved from git/util.py to git/cmd.py, with related changes, such as to imports, done everywhere they are needed. --- git/cmd.py | 67 ++++++++++++++++++++++++++++++++++++++++++++++-- git/index/fun.py | 4 +-- git/util.py | 62 -------------------------------------------- test/test_git.py | 5 ++-- 4 files changed, 69 insertions(+), 69 deletions(-) diff --git a/git/cmd.py b/git/cmd.py index c4dea0393..4413182e0 100644 --- a/git/cmd.py +++ b/git/cmd.py @@ -29,8 +29,8 @@ cygpath, expand_path, is_cygwin_git, + patch_env, remove_password_if_present, - safer_popen, stream_copy, ) @@ -46,6 +46,7 @@ Iterator, List, Mapping, + Optional, Sequence, TYPE_CHECKING, TextIO, @@ -102,7 +103,7 @@ def handle_process_output( Callable[[bytes, "Repo", "DiffIndex"], None], ], stderr_handler: Union[None, Callable[[AnyStr], None], Callable[[List[AnyStr]], None]], - finalizer: Union[None, Callable[[Union[subprocess.Popen, "Git.AutoInterrupt"]], None]] = None, + finalizer: Union[None, Callable[[Union[Popen, "Git.AutoInterrupt"]], None]] = None, decode_streams: bool = True, kill_after_timeout: Union[None, float] = None, ) -> None: @@ -207,6 +208,68 @@ def pump_stream( finalizer(process) +def _safer_popen_windows( + command: Union[str, Sequence[Any]], + *, + shell: bool = False, + env: Optional[Mapping[str, str]] = None, + **kwargs: Any, +) -> Popen: + """Call :class:`subprocess.Popen` on Windows but don't include a CWD in the search. + + This avoids an untrusted search path condition where a file like ``git.exe`` in a + malicious repository would be run when GitPython operates on the repository. The + process using GitPython may have an untrusted repository's working tree as its + current working directory. Some operations may temporarily change to that directory + before running a subprocess. In addition, while by default GitPython does not run + external commands with a shell, it can be made to do so, in which case the CWD of + the subprocess, which GitPython usually sets to a repository working tree, can + itself be searched automatically by the shell. This wrapper covers all those cases. + + :note: This currently works by setting the ``NoDefaultCurrentDirectoryInExePath`` + environment variable during subprocess creation. It also takes care of passing + Windows-specific process creation flags, but that is unrelated to path search. + + :note: The current implementation contains a race condition on :attr:`os.environ`. + GitPython isn't thread-safe, but a program using it on one thread should ideally + be able to mutate :attr:`os.environ` on another, without unpredictable results. + See comments in https://github.com/gitpython-developers/GitPython/pull/1650. + """ + # CREATE_NEW_PROCESS_GROUP is needed for some ways of killing it afterwards. See: + # https://docs.python.org/3/library/subprocess.html#subprocess.Popen.send_signal + # https://docs.python.org/3/library/subprocess.html#subprocess.CREATE_NEW_PROCESS_GROUP + creationflags = subprocess.CREATE_NO_WINDOW | subprocess.CREATE_NEW_PROCESS_GROUP + + # When using a shell, the shell is the direct subprocess, so the variable must be + # set in its environment, to affect its search behavior. (The "1" can be any value.) + if shell: + safer_env = {} if env is None else dict(env) + safer_env["NoDefaultCurrentDirectoryInExePath"] = "1" + else: + safer_env = env + + # When not using a shell, the current process does the search in a CreateProcessW + # API call, so the variable must be set in our environment. With a shell, this is + # unnecessary, in versions where https://github.com/python/cpython/issues/101283 is + # patched. If not, in the rare case the ComSpec environment variable is unset, the + # shell is searched for unsafely. Setting NoDefaultCurrentDirectoryInExePath in all + # cases, as here, is simpler and protects against that. (The "1" can be any value.) + with patch_env("NoDefaultCurrentDirectoryInExePath", "1"): + return Popen( + command, + shell=shell, + env=safer_env, + creationflags=creationflags, + **kwargs, + ) + + +if os.name == "nt": + safer_popen = _safer_popen_windows +else: + safer_popen = Popen + + def dashify(string: str) -> str: return string.replace("_", "-") diff --git a/git/index/fun.py b/git/index/fun.py index 500fb251d..580493f6d 100644 --- a/git/index/fun.py +++ b/git/index/fun.py @@ -18,7 +18,7 @@ ) import subprocess -from git.cmd import handle_process_output +from git.cmd import handle_process_output, safer_popen from git.compat import defenc, force_bytes, force_text, safe_decode from git.exc import HookExecutionError, UnmergedEntriesError from git.objects.fun import ( @@ -26,7 +26,7 @@ traverse_trees_recursive, tree_to_stream, ) -from git.util import IndexFileSHA1Writer, finalize_process, safer_popen +from git.util import IndexFileSHA1Writer, finalize_process from gitdb.base import IStream from gitdb.typ import str_tree_type diff --git a/git/util.py b/git/util.py index 5f5fb8566..5ccd2b04c 100644 --- a/git/util.py +++ b/git/util.py @@ -33,7 +33,6 @@ IO, Iterator, List, - Mapping, Optional, Pattern, Sequence, @@ -536,67 +535,6 @@ def remove_password_if_present(cmdline: Sequence[str]) -> List[str]: return new_cmdline -def _safer_popen_windows( - command: Union[str, Sequence[Any]], - *, - shell: bool = False, - env: Optional[Mapping[str, str]] = None, - **kwargs: Any, -) -> subprocess.Popen: - """Call :class:`subprocess.Popen` on Windows but don't include a CWD in the search. - - This avoids an untrusted search path condition where a file like ``git.exe`` in a - malicious repository would be run when GitPython operates on the repository. The - process using GitPython may have an untrusted repository's working tree as its - current working directory. Some operations may temporarily change to that directory - before running a subprocess. In addition, while by default GitPython does not run - external commands with a shell, it can be made to do so, in which case the CWD of - the subprocess, which GitPython usually sets to a repository working tree, can - itself be searched automatically by the shell. This wrapper covers all those cases. - - :note: This currently works by setting the ``NoDefaultCurrentDirectoryInExePath`` - environment variable during subprocess creation. It also takes care of passing - Windows-specific process creation flags, but that is unrelated to path search. - - :note: The current implementation contains a race condition on :attr:`os.environ`. - GitPython isn't thread-safe, but a program using it on one thread should ideally - be able to mutate :attr:`os.environ` on another, without unpredictable results. - See comments in https://github.com/gitpython-developers/GitPython/pull/1650. - """ - # CREATE_NEW_PROCESS_GROUP is needed for some ways of killing it afterwards. See: - # https://docs.python.org/3/library/subprocess.html#subprocess.Popen.send_signal - # https://docs.python.org/3/library/subprocess.html#subprocess.CREATE_NEW_PROCESS_GROUP - creationflags = subprocess.CREATE_NO_WINDOW | subprocess.CREATE_NEW_PROCESS_GROUP - - # When using a shell, the shell is the direct subprocess, so the variable must be - # set in its environment, to affect its search behavior. (The "1" can be any value.) - if shell: - safer_env = {} if env is None else dict(env) - safer_env["NoDefaultCurrentDirectoryInExePath"] = "1" - else: - safer_env = env - - # When not using a shell, the current process does the search in a CreateProcessW - # API call, so the variable must be set in our environment. With a shell, this is - # unnecessary, in versions where https://github.com/python/cpython/issues/101283 is - # patched. If not, in the rare case the ComSpec environment variable is unset, the - # shell is searched for unsafely. Setting NoDefaultCurrentDirectoryInExePath in all - # cases, as here, is simpler and protects against that. (The "1" can be any value.) - with patch_env("NoDefaultCurrentDirectoryInExePath", "1"): - return subprocess.Popen( - command, - shell=shell, - env=safer_env, - creationflags=creationflags, - **kwargs, - ) - - -if os.name == "nt": - safer_popen = _safer_popen_windows -else: - safer_popen = subprocess.Popen - # } END utilities # { Classes diff --git a/test/test_git.py b/test/test_git.py index 39e19bb8a..3b9abc712 100644 --- a/test/test_git.py +++ b/test/test_git.py @@ -25,7 +25,7 @@ import ddt from git import Git, refresh, GitCommandError, GitCommandNotFound, Repo, cmd -from git.util import cwd, finalize_process, safer_popen +from git.util import cwd, finalize_process from test.lib import TestBase, fixture_path, with_rw_directory @@ -114,7 +114,6 @@ def test_it_transforms_kwargs_into_git_command_arguments(self): def _do_shell_combo(self, value_in_call, value_from_class): with mock.patch.object(Git, "USE_SHELL", value_from_class): - # git.cmd gets Popen via a "from" import, so patch it there. with mock.patch.object(cmd, "safer_popen", wraps=cmd.safer_popen) as mock_safer_popen: # Use a command with no arguments (besides the program name), so it runs # with or without a shell, on all OSes, with the same effect. @@ -389,7 +388,7 @@ def test_environment(self, rw_dir): self.assertIn("FOO", str(err)) def test_handle_process_output(self): - from git.cmd import handle_process_output + from git.cmd import handle_process_output, safer_popen line_count = 5002 count = [None, 0, 0] From 1f3caa31f1b63908235e341418a0804ed37a320a Mon Sep 17 00:00:00 2001 From: Eliah Kagan Date: Mon, 25 Dec 2023 11:33:40 -0500 Subject: [PATCH 16/16] Further clarify comment in test_hook_uses_shell_not_from_cwd --- test/test_index.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/test/test_index.py b/test/test_index.py index d352faa6c..8a64e2293 100644 --- a/test/test_index.py +++ b/test/test_index.py @@ -1032,16 +1032,16 @@ def test_hook_uses_shell_not_from_cwd(self, rw_dir, case): maybe_chdir = cwd(rw_dir) if chdir_to_repo else contextlib.nullcontext() repo = Repo.init(rw_dir) - # We need an impostor shell that works on Windows and that can be distinguished - # from the real bash.exe. But even if the real bash.exe is absent or unusable, - # we should verify that the impostor is not run. So the impostor needs a clear - # side effect (unlike in TestGit.test_it_executes_git_not_from_cwd). Popen on - # Windows uses CreateProcessW, which disregards PATHEXT; the impostor may need - # to be a binary executable to ensure the vulnerability is found if present. No - # compiler need exist, shipping a binary in the test suite may target the wrong - # architecture, and generating one in a bespoke way may cause virus scanners to - # give a false positive. So we use a Bash/Python polyglot for the hook and use - # the Python interpreter itself as the bash.exe impostor. But an interpreter + # We need an impostor shell that works on Windows and that the test can + # distinguish from the real bash.exe. But even if the real bash.exe is absent or + # unusable, we should verify the impostor is not run. So the impostor needs a + # clear side effect (unlike in TestGit.test_it_executes_git_not_from_cwd). Popen + # on Windows uses CreateProcessW, which disregards PATHEXT; the impostor may + # need to be a binary executable to ensure the vulnerability is found if + # present. No compiler need exist, shipping a binary in the test suite may + # target the wrong architecture, and generating one in a bespoke way may trigger + # false positive virus scans. So we use a Bash/Python polyglot for the hook and + # use the Python interpreter itself as the bash.exe impostor. But an interpreter # from a venv may not run when copied outside of it, and a global interpreter # won't run when copied to a different location if it was installed from the # Microsoft Store. So we make a new venv in rw_dir and use its interpreter.