Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 23 additions & 6 deletions .github/workflows/gh-pages.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -31,17 +31,34 @@ jobs:
fetch-depth: 0
submodules: recursive

- name: Setup Python
uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065
- name: Install uv ${{ vars.UV_VERSION }} and python ${{ vars.DEFAULT_PYTHON_VERSION }}
uses: astral-sh/setup-uv@0c5e2b8115b80b4c7c5ddf6ffdd634974642d182 # v5.4.1
with:
python-version: "3.11"
enable-cache: true
cache-dependency-glob: "uv.lock"
version: ${{ vars.UV_VERSION }}
python-version: ${{ vars.DEFAULT_PYTHON_VERSION }}

- name: Install Tox
run: pip install tox
- uses: ./.github/actions/get-changed-files
if: github.event_name == 'pull_request'
id: get-changed-files

- name: Detect changed forks
if: github.event_name == 'pull_request'
run: |
FILE_LIST="${{ steps.get-changed-files.outputs.file_list }}"
FORKS=$(grep '^src/ethereum/forks/' "$FILE_LIST" | cut -d/ -f4 | sort -u | paste -sd, - || true)

if [ -n "$FORKS" ] && ! grep -qv '^src/ethereum/forks/' "$FILE_LIST"; then
echo "DOCC_DIFF_FORKS=$FORKS" >> "$GITHUB_ENV"
echo "Building diffs for changed forks only: $FORKS"
else
echo "Non-fork files changed, building all diffs"
fi

- name: Build Documentation
run: |
tox -e spec-docs
uvx tox -e spec-docs
touch .tox/docs/.nojekyll

- name: Upload Pages Artifact
Expand Down
168 changes: 155 additions & 13 deletions src/ethereum_spec_tools/docc.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,10 @@

import dataclasses
import logging
import os
from collections import defaultdict
from itertools import tee, zip_longest
from pathlib import PurePath
from pathlib import Path, PurePath
from typing import (
Dict,
Final,
Expand Down Expand Up @@ -85,6 +86,25 @@ def __init__(self, config: PluginSettings) -> None:
forks = base / "forks"
self.forks = Hardfork.discover([str(forks)])

@staticmethod
def _diff_forks() -> Optional[FrozenSet[str]]:
"""
Return fork short names to limit diffing to, or None for all.

Controlled by the DOCC_DIFF_FORKS environment variable (comma-
separated fork short names). When set, only fork pairs where
at least one side is listed will be discovered.
"""
env = os.environ.get("DOCC_DIFF_FORKS", "").strip()
if not env:
return None
names = frozenset(
name.strip() for name in env.split(",") if name.strip()
)
if names:
logging.info("Limiting diffs to forks: %s", ", ".join(names))
return names or None

def discover(self, known: FrozenSet[T]) -> Iterator[Source]:
"""
Find sources.
Expand Down Expand Up @@ -121,8 +141,17 @@ def discover(self, known: FrozenSet[T]) -> Iterator[Source]:

by_fork[fork][fork_relative_path] = source

diff_forks = self._diff_forks()

diff_count = 0
for before, after in pairwise(self.forks):
if diff_forks is not None:
if (
before.short_name not in diff_forks
and after.short_name not in diff_forks
):
continue

paths = set(by_fork[before].keys()) | set(by_fork[after].keys())

for path in paths:
Expand Down Expand Up @@ -315,11 +344,44 @@ def __repr__(self) -> str:
)


class IdenticalDiffNode(DiffNode):
"""
A DiffNode where both sides have identical source content.

FixIndexTransform still rewrites identifiers, but
MinimizeDiffsTransform skips tree diffing entirely.
"""

pass


class EthereumBuilder(PythonBuilder):
"""
A `PythonBuilder` that additionally builds `Document`s from `DiffSource`s.
"""

@staticmethod
def _sources_identical(diff_source: "DiffSource[Source]") -> bool:
"""
Return True if both sides of a diff have identical file content.

Avoid AST parsing and tree diffing when the source
files are byte-for-byte the same.
"""
before = diff_source.before
after = diff_source.after

if before is None or after is None:
return False

before_path = getattr(before, "absolute_path", None)
after_path = getattr(after, "absolute_path", None)

if before_path is None or after_path is None:
return False

return Path(before_path).read_bytes() == Path(after_path).read_bytes()

def build(
self,
unprocessed: Set[Source],
Expand All @@ -335,17 +397,49 @@ def build(
source_set = set(s for s in unprocessed if isinstance(s, DiffSource))
unprocessed -= source_set

before_unprocessed = {s.before for s in source_set if s.before}
after_unprocessed = {s.after for s in source_set if s.after}
# Separate identical sources from changed sources to skip
# tree diffing for files that haven't changed.
changed: Set[DiffSource[Source]] = set()
identical: Set[DiffSource[Source]] = set()

for s in source_set:
if self._sources_identical(s):
identical.add(s)
else:
changed.add(s)

if identical:
logging.info(
"Skipping tree diff for %s identical file pair(s)",
len(identical),
)

before_unprocessed = {s.before for s in changed if s.before}
after_unprocessed = {s.after for s in changed if s.after}

# For identical pairs we only need the after tree.
after_identical = {s.after for s in identical if s.after}
after_unprocessed |= after_identical

logging.info(
"Building diff trees: %d changed, %d identical"
" (%d before + %d after sources)",
len(changed),
len(identical),
len(before_unprocessed),
len(after_unprocessed),
)

# Rebuild the sources so we get distinct tree objects.
before_processed: Dict[Source, Document] = dict()
after_processed: Dict[Source, Document] = dict()

super().build(before_unprocessed, before_processed)
logging.info("Built %d before trees", len(before_processed))
super().build(after_unprocessed, after_processed)
logging.info("Built %d after trees", len(after_processed))

for diff_source in source_set:
for diff_source in changed:
before: Node = BlankNode()
if diff_source.before:
before_document = before_processed[diff_source.before]
Expand All @@ -364,6 +458,25 @@ def build(
document = Document(root)
processed[diff_source] = document

# For identical pairs, wrap in IdenticalDiffNode so that
# FixIndexTransform rewrites identifiers correctly, but
# MinimizeDiffsTransform can skip the expensive tree diff.
for diff_source in identical:
after_node: Node = BlankNode()
if diff_source.after:
after_document = after_processed[diff_source.after]
del after_processed[diff_source.after]
after_node = AfterNode(after_document.root)

root = IdenticalDiffNode(
diff_source.before_name,
BeforeNode(BlankNode()),
diff_source.after_name,
after_node,
)
document = Document(root)
processed[diff_source] = document


class FixIndexTransform(Transform):
"""
Expand Down Expand Up @@ -1002,15 +1115,32 @@ class MinimizeDiffsTransform(Transform):
Move `DiffNode` nodes as far down the tree as reasonably possible.
"""

def __init__(self, settings: PluginSettings) -> None:
pass
_count: int

def __init__(self, settings: PluginSettings) -> None: # noqa: ARG002
self._count = 0

def transform(self, context: Context) -> None:
"""
Apply the transformation to the given document.
"""
self._count += 1
root = context[Document].root

kind = "skip"
if isinstance(root, IdenticalDiffNode):
kind = "identical"
elif isinstance(root, DiffNode):
kind = "diff"

logging.info(
"MinimizeDiffs [%d] (%s)",
self._count,
kind,
)

visitor = _MinimizeDiffsVisitor()
context[Document].root.visit(visitor)
root.visit(visitor)
assert visitor.root is not None
context[Document].root = visitor.root

Expand All @@ -1023,6 +1153,14 @@ def __init__(self) -> None:
self._stack = []
self.root = None

def _replace_node(self, node: Node, output: Node) -> None:
if 1 == len(self._stack):
self._stack[0] = output
self.root = output
else:
self._stack[-2].replace_child(node, output)
self._stack[-1] = output

def enter(self, node: Node) -> Visit:
self._stack.append(node)
if self.root is None:
Expand All @@ -1031,6 +1169,15 @@ def enter(self, node: Node) -> Visit:
if not isinstance(node, DiffNode):
return Visit.TraverseChildren

# Identical files: skip tree diff, use after content directly.
if isinstance(node, IdenticalDiffNode):
after = node.after
if isinstance(after, AfterNode):
self._replace_node(node, after.child)
else:
self._replace_node(node, BlankNode())
return Visit.SkipChildren

before = node.before
after = node.after

Expand All @@ -1054,12 +1201,7 @@ def enter(self, node: Node) -> Visit:
apply.root.visit(_HardenVisitor())
output = apply.output()

if 1 == len(self._stack):
self._stack[0] = output
self.root = output
else:
self._stack[-2].replace_child(node, output)
self._stack[-1] = output
self._replace_node(node, output)

return Visit.SkipChildren

Expand Down
3 changes: 3 additions & 0 deletions tox.ini
Original file line number Diff line number Diff line change
Expand Up @@ -194,6 +194,9 @@ commands =
[testenv:spec-docs]
description = Generate documentation for the specification code using docc
basepython = python3
# TODO: remove DOCC_DIFF_FORKS — temporary for benchmarking
setenv =
DOCC_DIFF_FORKS = amsterdam
commands =
docc --output "{toxworkdir}/docs"
python -c 'import pathlib; print("documentation available under file://\{0\}".format(pathlib.Path(r"{toxworkdir}") / "docs" / "index.html"))'
Expand Down