diff --git a/.github/workflows/gh-pages.yaml b/.github/workflows/gh-pages.yaml index c2a90f8f635..15dc133dd82 100644 --- a/.github/workflows/gh-pages.yaml +++ b/.github/workflows/gh-pages.yaml @@ -31,17 +31,34 @@ jobs: fetch-depth: 0 submodules: recursive - - name: Setup Python - uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 + - name: Install uv ${{ vars.UV_VERSION }} and python ${{ vars.DEFAULT_PYTHON_VERSION }} + uses: astral-sh/setup-uv@0c5e2b8115b80b4c7c5ddf6ffdd634974642d182 # v5.4.1 with: - python-version: "3.11" + enable-cache: true + cache-dependency-glob: "uv.lock" + version: ${{ vars.UV_VERSION }} + python-version: ${{ vars.DEFAULT_PYTHON_VERSION }} - - name: Install Tox - run: pip install tox + - uses: ./.github/actions/get-changed-files + if: github.event_name == 'pull_request' + id: get-changed-files + + - name: Detect changed forks + if: github.event_name == 'pull_request' + run: | + FILE_LIST="${{ steps.get-changed-files.outputs.file_list }}" + FORKS=$(grep '^src/ethereum/forks/' "$FILE_LIST" | cut -d/ -f4 | sort -u | paste -sd, - || true) + + if [ -n "$FORKS" ] && ! grep -qv '^src/ethereum/forks/' "$FILE_LIST"; then + echo "DOCC_DIFF_FORKS=$FORKS" >> "$GITHUB_ENV" + echo "Building diffs for changed forks only: $FORKS" + else + echo "Non-fork files changed, building all diffs" + fi - name: Build Documentation run: | - tox -e spec-docs + uvx tox -e spec-docs touch .tox/docs/.nojekyll - name: Upload Pages Artifact diff --git a/src/ethereum_spec_tools/docc.py b/src/ethereum_spec_tools/docc.py index cc1623174aa..65561e3b2f2 100644 --- a/src/ethereum_spec_tools/docc.py +++ b/src/ethereum_spec_tools/docc.py @@ -19,9 +19,10 @@ import dataclasses import logging +import os from collections import defaultdict from itertools import tee, zip_longest -from pathlib import PurePath +from pathlib import Path, PurePath from typing import ( Dict, Final, @@ -85,6 +86,25 @@ def __init__(self, config: PluginSettings) -> None: forks = base / "forks" self.forks = Hardfork.discover([str(forks)]) + @staticmethod + def _diff_forks() -> Optional[FrozenSet[str]]: + """ + Return fork short names to limit diffing to, or None for all. + + Controlled by the DOCC_DIFF_FORKS environment variable (comma- + separated fork short names). When set, only fork pairs where + at least one side is listed will be discovered. + """ + env = os.environ.get("DOCC_DIFF_FORKS", "").strip() + if not env: + return None + names = frozenset( + name.strip() for name in env.split(",") if name.strip() + ) + if names: + logging.info("Limiting diffs to forks: %s", ", ".join(names)) + return names or None + def discover(self, known: FrozenSet[T]) -> Iterator[Source]: """ Find sources. @@ -121,8 +141,17 @@ def discover(self, known: FrozenSet[T]) -> Iterator[Source]: by_fork[fork][fork_relative_path] = source + diff_forks = self._diff_forks() + diff_count = 0 for before, after in pairwise(self.forks): + if diff_forks is not None: + if ( + before.short_name not in diff_forks + and after.short_name not in diff_forks + ): + continue + paths = set(by_fork[before].keys()) | set(by_fork[after].keys()) for path in paths: @@ -315,11 +344,44 @@ def __repr__(self) -> str: ) +class IdenticalDiffNode(DiffNode): + """ + A DiffNode where both sides have identical source content. + + FixIndexTransform still rewrites identifiers, but + MinimizeDiffsTransform skips tree diffing entirely. + """ + + pass + + class EthereumBuilder(PythonBuilder): """ A `PythonBuilder` that additionally builds `Document`s from `DiffSource`s. """ + @staticmethod + def _sources_identical(diff_source: "DiffSource[Source]") -> bool: + """ + Return True if both sides of a diff have identical file content. + + Avoid AST parsing and tree diffing when the source + files are byte-for-byte the same. + """ + before = diff_source.before + after = diff_source.after + + if before is None or after is None: + return False + + before_path = getattr(before, "absolute_path", None) + after_path = getattr(after, "absolute_path", None) + + if before_path is None or after_path is None: + return False + + return Path(before_path).read_bytes() == Path(after_path).read_bytes() + def build( self, unprocessed: Set[Source], @@ -335,17 +397,49 @@ def build( source_set = set(s for s in unprocessed if isinstance(s, DiffSource)) unprocessed -= source_set - before_unprocessed = {s.before for s in source_set if s.before} - after_unprocessed = {s.after for s in source_set if s.after} + # Separate identical sources from changed sources to skip + # tree diffing for files that haven't changed. + changed: Set[DiffSource[Source]] = set() + identical: Set[DiffSource[Source]] = set() + + for s in source_set: + if self._sources_identical(s): + identical.add(s) + else: + changed.add(s) + + if identical: + logging.info( + "Skipping tree diff for %s identical file pair(s)", + len(identical), + ) + + before_unprocessed = {s.before for s in changed if s.before} + after_unprocessed = {s.after for s in changed if s.after} + + # For identical pairs we only need the after tree. + after_identical = {s.after for s in identical if s.after} + after_unprocessed |= after_identical + + logging.info( + "Building diff trees: %d changed, %d identical" + " (%d before + %d after sources)", + len(changed), + len(identical), + len(before_unprocessed), + len(after_unprocessed), + ) # Rebuild the sources so we get distinct tree objects. before_processed: Dict[Source, Document] = dict() after_processed: Dict[Source, Document] = dict() super().build(before_unprocessed, before_processed) + logging.info("Built %d before trees", len(before_processed)) super().build(after_unprocessed, after_processed) + logging.info("Built %d after trees", len(after_processed)) - for diff_source in source_set: + for diff_source in changed: before: Node = BlankNode() if diff_source.before: before_document = before_processed[diff_source.before] @@ -364,6 +458,25 @@ def build( document = Document(root) processed[diff_source] = document + # For identical pairs, wrap in IdenticalDiffNode so that + # FixIndexTransform rewrites identifiers correctly, but + # MinimizeDiffsTransform can skip the expensive tree diff. + for diff_source in identical: + after_node: Node = BlankNode() + if diff_source.after: + after_document = after_processed[diff_source.after] + del after_processed[diff_source.after] + after_node = AfterNode(after_document.root) + + root = IdenticalDiffNode( + diff_source.before_name, + BeforeNode(BlankNode()), + diff_source.after_name, + after_node, + ) + document = Document(root) + processed[diff_source] = document + class FixIndexTransform(Transform): """ @@ -1002,15 +1115,32 @@ class MinimizeDiffsTransform(Transform): Move `DiffNode` nodes as far down the tree as reasonably possible. """ - def __init__(self, settings: PluginSettings) -> None: - pass + _count: int + + def __init__(self, settings: PluginSettings) -> None: # noqa: ARG002 + self._count = 0 def transform(self, context: Context) -> None: """ Apply the transformation to the given document. """ + self._count += 1 + root = context[Document].root + + kind = "skip" + if isinstance(root, IdenticalDiffNode): + kind = "identical" + elif isinstance(root, DiffNode): + kind = "diff" + + logging.info( + "MinimizeDiffs [%d] (%s)", + self._count, + kind, + ) + visitor = _MinimizeDiffsVisitor() - context[Document].root.visit(visitor) + root.visit(visitor) assert visitor.root is not None context[Document].root = visitor.root @@ -1023,6 +1153,14 @@ def __init__(self) -> None: self._stack = [] self.root = None + def _replace_node(self, node: Node, output: Node) -> None: + if 1 == len(self._stack): + self._stack[0] = output + self.root = output + else: + self._stack[-2].replace_child(node, output) + self._stack[-1] = output + def enter(self, node: Node) -> Visit: self._stack.append(node) if self.root is None: @@ -1031,6 +1169,15 @@ def enter(self, node: Node) -> Visit: if not isinstance(node, DiffNode): return Visit.TraverseChildren + # Identical files: skip tree diff, use after content directly. + if isinstance(node, IdenticalDiffNode): + after = node.after + if isinstance(after, AfterNode): + self._replace_node(node, after.child) + else: + self._replace_node(node, BlankNode()) + return Visit.SkipChildren + before = node.before after = node.after @@ -1054,12 +1201,7 @@ def enter(self, node: Node) -> Visit: apply.root.visit(_HardenVisitor()) output = apply.output() - if 1 == len(self._stack): - self._stack[0] = output - self.root = output - else: - self._stack[-2].replace_child(node, output) - self._stack[-1] = output + self._replace_node(node, output) return Visit.SkipChildren diff --git a/tox.ini b/tox.ini index f23707dc359..b8ac3f32185 100644 --- a/tox.ini +++ b/tox.ini @@ -194,6 +194,9 @@ commands = [testenv:spec-docs] description = Generate documentation for the specification code using docc basepython = python3 +# TODO: remove DOCC_DIFF_FORKS — temporary for benchmarking +setenv = + DOCC_DIFF_FORKS = amsterdam commands = docc --output "{toxworkdir}/docs" python -c 'import pathlib; print("documentation available under file://\{0\}".format(pathlib.Path(r"{toxworkdir}") / "docs" / "index.html"))'