Skip to content

Commit 91a7398

Browse files
committed
CM-55207: improve testing and cover some edge cases in commit pre-receive
1 parent c223e04 commit 91a7398

File tree

3 files changed

+206
-8
lines changed

3 files changed

+206
-8
lines changed

cycode/cli/apps/scan/pre_receive/pre_receive_command.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,9 @@ def pre_receive_command(
4747
timeout = configuration_manager.get_pre_receive_command_timeout(command_scan_type)
4848
with TimeoutAfter(timeout):
4949
branch_update_details = parse_pre_receive_input()
50-
commit_range = calculate_pre_receive_commit_range(branch_update_details)
50+
commit_range = calculate_pre_receive_commit_range(
51+
repo_path=os.getcwd(), branch_update_details=branch_update_details
52+
)
5153
if not commit_range:
5254
logger.info(
5355
'No new commits found for pushed branch, %s',

cycode/cli/files_collector/commit_range_documents.py

Lines changed: 27 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -104,19 +104,27 @@ def collect_commit_range_diff_documents(
104104
return commit_documents_to_scan
105105

106106

107-
def calculate_pre_receive_commit_range(branch_update_details: str) -> Optional[str]:
107+
def calculate_pre_receive_commit_range(repo_path: str, branch_update_details: str) -> Optional[str]:
108108
end_commit = _get_end_commit_from_branch_update_details(branch_update_details)
109109

110110
# branch is deleted, no need to perform scan
111111
if end_commit == consts.EMPTY_COMMIT_SHA:
112112
return None
113113

114-
start_commit = _get_oldest_unupdated_commit_for_branch(end_commit)
114+
repo = git_proxy.get_repo(repo_path)
115+
start_commit = _get_oldest_unupdated_commit_for_branch(repo, end_commit)
115116

116117
# no new commit to update found
117118
if not start_commit:
118119
return None
119120

121+
# If the oldest not-yet-updated commit has no parent (root commit or orphaned history),
122+
# using '~1' will fail. In that case, scan from the end commit, which effectively
123+
# includes the entire history reachable from it (which is exactly what we need here).
124+
125+
if not bool(repo.commit(start_commit).parents):
126+
return f'{end_commit}'
127+
120128
return f'{start_commit}~1...{end_commit}'
121129

122130

@@ -126,10 +134,10 @@ def _get_end_commit_from_branch_update_details(update_details: str) -> str:
126134
return end_commit
127135

128136

129-
def _get_oldest_unupdated_commit_for_branch(commit: str) -> Optional[str]:
137+
def _get_oldest_unupdated_commit_for_branch(repo: 'Repo', commit: str) -> Optional[str]:
130138
# get a list of commits by chronological order that are not in the remote repository yet
131139
# more info about rev-list command: https://git-scm.com/docs/git-rev-list
132-
repo = git_proxy.get_repo(os.getcwd())
140+
133141
not_updated_commits = repo.git.rev_list(commit, '--topo-order', '--reverse', '--not', '--all')
134142

135143
commits = not_updated_commits.splitlines()
@@ -199,8 +207,7 @@ def parse_pre_receive_input() -> str:
199207
200208
:return: First branch update details (input's first line)
201209
"""
202-
# FIXME(MarshalX): this blocks main thread forever if called outside of pre-receive hook
203-
pre_receive_input = sys.stdin.read().strip()
210+
pre_receive_input = _read_hook_input_from_stdin()
204211
if not pre_receive_input:
205212
raise ValueError(
206213
'Pre receive input was not found. Make sure that you are using this command only in pre-receive hook'
@@ -222,7 +229,7 @@ def parse_pre_push_input() -> str:
222229
223230
:return: First, push update details (input's first line)
224231
""" # noqa: E501
225-
pre_push_input = sys.stdin.read().strip()
232+
pre_push_input = _read_hook_input_from_stdin()
226233
if not pre_push_input:
227234
raise ValueError(
228235
'Pre push input was not found. Make sure that you are using this command only in pre-push hook'
@@ -232,6 +239,19 @@ def parse_pre_push_input() -> str:
232239
return pre_push_input.splitlines()[0]
233240

234241

242+
def _read_hook_input_from_stdin() -> str:
243+
"""Read input from stdin when called from a hook.
244+
245+
If called manually from the command line, return an empty string so it doesn't block the main thread.
246+
247+
Returns:
248+
Input from stdin
249+
"""
250+
if sys.stdin.isatty():
251+
return ''
252+
return sys.stdin.read().strip()
253+
254+
235255
def _get_default_branches_for_merge_base(repo: 'Repo') -> list[str]:
236256
"""Get a list of default branches to try for merge base calculation.
237257

tests/cli/files_collector/test_commit_range_documents.py

Lines changed: 176 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,13 +12,22 @@
1212
from cycode.cli.files_collector.commit_range_documents import (
1313
_get_default_branches_for_merge_base,
1414
calculate_pre_push_commit_range,
15+
calculate_pre_receive_commit_range,
1516
get_diff_file_path,
1617
get_safe_head_reference_for_diff,
1718
parse_commit_range,
1819
parse_pre_push_input,
20+
parse_pre_receive_input,
1921
)
2022
from cycode.cli.utils.path_utils import get_path_by_os
2123

24+
DUMMY_SHA_0 = '0' * 40
25+
DUMMY_SHA_1 = '1' * 40
26+
DUMMY_SHA_2 = '2' * 40
27+
DUMMY_SHA_A = 'a' * 40
28+
DUMMY_SHA_B = 'b' * 40
29+
DUMMY_SHA_C = 'c' * 40
30+
2231

2332
@contextmanager
2433
def git_repository(path: str) -> Generator[Repo, None, None]:
@@ -871,3 +880,170 @@ def test_single_commit_spec(self) -> None:
871880

872881
parsed_from, parsed_to = parse_commit_range(a, temp_dir)
873882
assert (parsed_from, parsed_to) == (a, c)
883+
884+
885+
class TestParsePreReceiveInput:
886+
"""Test the parse_pre_receive_input function with various pre-receive hook input scenarios."""
887+
888+
def test_parse_single_update_input(self) -> None:
889+
"""Test parsing a single branch update input."""
890+
pre_receive_input = f'{DUMMY_SHA_1} {DUMMY_SHA_2} refs/heads/main'
891+
892+
with patch('sys.stdin', StringIO(pre_receive_input)):
893+
result = parse_pre_receive_input()
894+
assert result == pre_receive_input
895+
896+
def test_parse_multiple_update_input_returns_first_line(self) -> None:
897+
"""Test parsing multiple branch updates returns only the first line."""
898+
pre_receive_input = f"""{DUMMY_SHA_0} {DUMMY_SHA_A} refs/heads/main
899+
{DUMMY_SHA_B} {DUMMY_SHA_C} refs/heads/feature"""
900+
901+
with patch('sys.stdin', StringIO(pre_receive_input)):
902+
result = parse_pre_receive_input()
903+
assert result == f'{DUMMY_SHA_0} {DUMMY_SHA_A} refs/heads/main'
904+
905+
def test_parse_empty_input_raises_error(self) -> None:
906+
"""Test that empty input raises ValueError."""
907+
match = 'Pre receive input was not found'
908+
with patch('sys.stdin', StringIO('')), pytest.raises(ValueError, match=match):
909+
parse_pre_receive_input()
910+
911+
912+
class TestCalculatePreReceiveCommitRange:
913+
"""Test the calculate_pre_receive_commit_range function with representative scenarios."""
914+
915+
def test_branch_deletion_returns_none(self) -> None:
916+
"""When end commit is all zeros (deletion), no scan is needed."""
917+
update_details = f'{DUMMY_SHA_A} {consts.EMPTY_COMMIT_SHA} refs/heads/feature'
918+
assert calculate_pre_receive_commit_range(os.getcwd(), update_details) is None
919+
920+
def test_no_new_commits_returns_none(self) -> None:
921+
"""When there are no commits not in remote, return None."""
922+
with tempfile.TemporaryDirectory() as server_dir:
923+
server_repo = Repo.init(server_dir, bare=True)
924+
try:
925+
with tempfile.TemporaryDirectory() as work_dir:
926+
work_repo = Repo.init(work_dir, b='main')
927+
try:
928+
# Create a single commit and push it to the server as main (end commit is already on a ref)
929+
test_file = os.path.join(work_dir, 'file.txt')
930+
with open(test_file, 'w') as f:
931+
f.write('base')
932+
work_repo.index.add(['file.txt'])
933+
end_commit = work_repo.index.commit('initial')
934+
935+
work_repo.create_remote('origin', server_dir)
936+
work_repo.remotes.origin.push('main:main')
937+
938+
update_details = f'{DUMMY_SHA_A} {end_commit.hexsha} refs/heads/main'
939+
assert calculate_pre_receive_commit_range(server_dir, update_details) is None
940+
finally:
941+
work_repo.close()
942+
finally:
943+
server_repo.close()
944+
945+
def test_returns_triple_dot_range_from_oldest_unupdated(self) -> None:
946+
"""Returns '<oldest>~1...<end>' when there are new commits to scan."""
947+
with tempfile.TemporaryDirectory() as server_dir:
948+
server_repo = Repo.init(server_dir, bare=True)
949+
try:
950+
with tempfile.TemporaryDirectory() as work_dir:
951+
work_repo = Repo.init(work_dir, b='main')
952+
try:
953+
# Create commit A and push it to server as main (server has A on a ref)
954+
a_path = os.path.join(work_dir, 'a.txt')
955+
with open(a_path, 'w') as f:
956+
f.write('A')
957+
work_repo.index.add(['a.txt'])
958+
work_repo.index.commit('A')
959+
960+
work_repo.create_remote('origin', server_dir)
961+
work_repo.remotes.origin.push('main:main')
962+
963+
# Create commits B and C locally (not yet on server ref)
964+
b_path = os.path.join(work_dir, 'b.txt')
965+
with open(b_path, 'w') as f:
966+
f.write('B')
967+
work_repo.index.add(['b.txt'])
968+
b_commit = work_repo.index.commit('B')
969+
970+
c_path = os.path.join(work_dir, 'c.txt')
971+
with open(c_path, 'w') as f:
972+
f.write('C')
973+
work_repo.index.add(['c.txt'])
974+
end_commit = work_repo.index.commit('C')
975+
976+
# Push the objects to a temporary ref and then delete that ref on server,
977+
# so the objects exist but are not reachable from any ref.
978+
work_repo.remotes.origin.push(f'{end_commit.hexsha}:refs/tmp/hold')
979+
Repo(server_dir).git.update_ref('-d', 'refs/tmp/hold')
980+
981+
update_details = f'{DUMMY_SHA_A} {end_commit.hexsha} refs/heads/main'
982+
result = calculate_pre_receive_commit_range(server_dir, update_details)
983+
assert result == f'{b_commit.hexsha}~1...{end_commit.hexsha}'
984+
finally:
985+
work_repo.close()
986+
finally:
987+
server_repo.close()
988+
989+
def test_initial_oldest_commit_without_parent_returns_single_commit_range(self) -> None:
990+
"""If oldest commit has no parent, avoid '~1' and scan from end commit only."""
991+
with tempfile.TemporaryDirectory() as server_dir:
992+
server_repo = Repo.init(server_dir, bare=True)
993+
try:
994+
with tempfile.TemporaryDirectory() as work_dir:
995+
work_repo = Repo.init(work_dir, b='main')
996+
try:
997+
# Create a single root commit locally
998+
p = os.path.join(work_dir, 'root.txt')
999+
with open(p, 'w') as f:
1000+
f.write('root')
1001+
work_repo.index.add(['root.txt'])
1002+
end_commit = work_repo.index.commit('root')
1003+
1004+
work_repo.create_remote('origin', server_dir)
1005+
# Push objects to a temporary ref and delete it so server has objects but no refs
1006+
work_repo.remotes.origin.push(f'{end_commit.hexsha}:refs/tmp/hold')
1007+
Repo(server_dir).git.update_ref('-d', 'refs/tmp/hold')
1008+
1009+
update_details = f'{DUMMY_SHA_A} {end_commit.hexsha} refs/heads/main'
1010+
result = calculate_pre_receive_commit_range(server_dir, update_details)
1011+
assert result == end_commit.hexsha
1012+
finally:
1013+
work_repo.close()
1014+
finally:
1015+
server_repo.close()
1016+
1017+
def test_initial_oldest_commit_without_parent_with_two_commits_returns_single_commit_range(self) -> None:
1018+
"""If there are two new commits and the oldest has no parent, avoid '~1' and scan from end commit only."""
1019+
with tempfile.TemporaryDirectory() as server_dir:
1020+
server_repo = Repo.init(server_dir, bare=True)
1021+
try:
1022+
with tempfile.TemporaryDirectory() as work_dir:
1023+
work_repo = Repo.init(work_dir, b='main')
1024+
try:
1025+
# Create two commits locally: oldest has no parent, second on top
1026+
a_path = os.path.join(work_dir, 'a.txt')
1027+
with open(a_path, 'w') as f:
1028+
f.write('A')
1029+
work_repo.index.add(['a.txt'])
1030+
work_repo.index.commit('A')
1031+
1032+
d_path = os.path.join(work_dir, 'd.txt')
1033+
with open(d_path, 'w') as f:
1034+
f.write('D')
1035+
work_repo.index.add(['d.txt'])
1036+
end_commit = work_repo.index.commit('D')
1037+
1038+
work_repo.create_remote('origin', server_dir)
1039+
# Push objects to a temporary ref and delete it so server has objects but no refs
1040+
work_repo.remotes.origin.push(f'{end_commit.hexsha}:refs/tmp/hold')
1041+
Repo(server_dir).git.update_ref('-d', 'refs/tmp/hold')
1042+
1043+
update_details = f'{consts.EMPTY_COMMIT_SHA} {end_commit.hexsha} refs/heads/main'
1044+
result = calculate_pre_receive_commit_range(server_dir, update_details)
1045+
assert result == end_commit.hexsha
1046+
finally:
1047+
work_repo.close()
1048+
finally:
1049+
server_repo.close()

0 commit comments

Comments
 (0)