diff --git a/scripts/check_ecosystem.py b/scripts/check_ecosystem.py index 59ebb9a056833..ed7c876811c65 100755 --- a/scripts/check_ecosystem.py +++ b/scripts/check_ecosystem.py @@ -45,11 +45,11 @@ async def clone(self: Self, checkout_dir: Path) -> AsyncIterator[Path]: """Shallow clone this repository to a temporary directory.""" if checkout_dir.exists(): logger.debug(f"Reusing {self.org}:{self.repo}") - yield Path(checkout_dir) + yield await self._get_commit(checkout_dir) return logger.debug(f"Cloning {self.org}:{self.repo}") - git_command = [ + git_clone_command = [ "git", "clone", "--config", @@ -60,39 +60,50 @@ async def clone(self: Self, checkout_dir: Path) -> AsyncIterator[Path]: "--no-tags", ] if self.ref: - git_command.extend(["--branch", self.ref]) + git_clone_command.extend(["--branch", self.ref]) - git_command.extend( + git_clone_command.extend( [ f"https://github.com/{self.org}/{self.repo}", checkout_dir, ], ) - process = await create_subprocess_exec( - *git_command, + git_clone_process = await create_subprocess_exec( + *git_clone_command, env={"GIT_TERMINAL_PROMPT": "0"}, ) - status_code = await process.wait() + status_code = await git_clone_process.wait() logger.debug( f"Finished cloning {self.org}/{self.repo} with status {status_code}", ) + yield await self._get_commit(checkout_dir) - yield Path(checkout_dir) - - def url_for(self: Self, path: str, lnum: int | None = None) -> str: - """Return the GitHub URL for the given path and line number, if given.""" + def url_for(self: Self, commit_sha: str, path: str, lnum: int | None = None) -> str: + """ + Return the GitHub URL for the given commit, path, and line number, if given. + """ # Default to main branch - url = ( - f"https://github.com/{self.org}/{self.repo}" - f"/blob/{self.ref or 'main'}/{path}" - ) + url = f"https://github.com/{self.org}/{self.repo}/blob/{commit_sha}/{path}" if lnum: url += f"#L{lnum}" return url + async def _get_commit(self: Self, checkout_dir: Path) -> str: + """Return the commit sha for the repository in the checkout directory.""" + git_sha_process = await create_subprocess_exec( + *["git", "rev-parse", "HEAD"], + cwd=checkout_dir, + stdout=PIPE, + ) + git_sha_stdout, _ = await git_sha_process.communicate() + assert ( + await git_sha_process.wait() == 0 + ), f"Failed to retrieve commit sha at {checkout_dir}" + return git_sha_stdout.decode().strip() + REPOSITORIES: list[Repository] = [ Repository("apache", "airflow", "main", select="ALL"), @@ -169,6 +180,7 @@ class Diff(NamedTuple): removed: set[str] added: set[str] + source_sha: str def __bool__(self: Self) -> bool: """Return true if this diff is non-empty.""" @@ -203,13 +215,13 @@ async def compare( assert ":" not in repo.org assert ":" not in repo.repo checkout_dir = Path(checkout_parent).joinpath(f"{repo.org}:{repo.repo}") - async with repo.clone(checkout_dir) as path: + async with repo.clone(checkout_dir) as checkout_sha: try: async with asyncio.TaskGroup() as tg: check1 = tg.create_task( check( ruff=ruff1, - path=path, + path=checkout_dir, name=f"{repo.org}/{repo.repo}", select=repo.select, ignore=repo.ignore, @@ -220,7 +232,7 @@ async def compare( check2 = tg.create_task( check( ruff=ruff2, - path=path, + path=checkout_dir, name=f"{repo.org}/{repo.repo}", select=repo.select, ignore=repo.ignore, @@ -237,7 +249,7 @@ async def compare( elif line.startswith("+ "): added.add(line[2:]) - return Diff(removed, added) + return Diff(removed, added, checkout_sha) def read_projects_jsonl(projects_jsonl: Path) -> dict[tuple[str, str], Repository]: @@ -379,7 +391,7 @@ async def limited_parallelism(coroutine: T) -> T: continue pre, inner, path, lnum, post = match.groups() - url = repo.url_for(path, int(lnum)) + url = repo.url_for(diff.source_sha, path, int(lnum)) print(f"{pre} {inner} {post}") print("")