From e01a8004242a9e5f36107af1fb1c25d346dd9525 Mon Sep 17 00:00:00 2001 From: Leo Lara Date: Fri, 16 May 2025 03:05:50 +0000 Subject: [PATCH 01/18] Initial checkpoint --- Makefile | 2 +- pysetup/mk_to_spec.py | 513 ++++++++++++++++++++++++++++++++++++++++++ setup.py | 11 +- 3 files changed, 523 insertions(+), 3 deletions(-) create mode 100644 pysetup/mk_to_spec.py diff --git a/Makefile b/Makefile index 3366aca4e1..25be7bd587 100644 --- a/Makefile +++ b/Makefile @@ -275,4 +275,4 @@ kzg_setups: pyspec # Delete all untracked files. clean: - @git clean -fdx + rm -fR venv .mypy_cache build eth2spec.egg-info pysetup/__pycache__ pysetup/spec_builders/__pycache__ diff --git a/pysetup/mk_to_spec.py b/pysetup/mk_to_spec.py new file mode 100644 index 0000000000..df97b86e3c --- /dev/null +++ b/pysetup/mk_to_spec.py @@ -0,0 +1,513 @@ +import ast +import json +from pathlib import Path +import string +from typing import Dict, Optional, Tuple +import re +from functools import lru_cache + + +from marko.block import BlankLine, Heading, FencedCode, HTMLBlock +from marko.ext.gfm import gfm +from marko.ext.gfm.elements import Table +from marko.inline import CodeSpan + +from .typing import ProtocolDefinition, VariableDefinition, SpecObject + +class MarkdownToSpec: + def __init__(self, file_name: Path, preset: Dict[str, str], config: Dict[str, str], preset_name: str): + self.file_name = file_name + self.preset = preset + self.config = config + self.preset_name = preset_name + + self.functions: Dict[str, str] = {} + self.protocols: Dict[str, ProtocolDefinition] = {} + self.constant_vars: Dict[str, VariableDefinition] = {} + self.preset_dep_constant_vars: Dict[str, VariableDefinition] = {} + self.preset_vars: Dict[str, VariableDefinition] = {} + self.config_vars: Dict[str, VariableDefinition] = {} + self.ssz_dep_constants: Dict[str, str] = {} + self.func_dep_presets: Dict[str, str] = {} + self.ssz_objects: Dict[str, str] = {} + self.dataclasses: Dict[str, str] = {} + self.all_custom_types: Dict[str, str] = {} + self.custom_types: Dict[str, str] = {} + self.preset_dep_custom_types: Dict[str, str] = {} + + self.document = None + self.document_iterator = None + self.current_name = None + self.should_skip = False + self.list_of_records = None + self.list_of_records_name = None + + def run(self) -> SpecObject: + """ + Orchestrates the parsing and processing of the markdown spec file. + - Calls _parse_document() + - Iterates over self.document.children and processes each child + - Calls _finalize_types() and _build_spec_object() after processing + Returns: + SpecObject: The constructed specification object. + """ + self._parse_document() + # self.document_iterator = iter(self.document.children) + # while (child := self._get_next_element()) is not None: + for child in self.document.children: + self._process_child(child) + self._finalize_types() + return self._build_spec_object() + + def _get_next_element(self): + """ + Returns the next element in the document. + If the end of the document is reached, returns None. + """ + + try: + # while isinstance(result := next(self.document_iterator), BlankLine): + # pass + # return result + next(self.document_iterator) + except StopIteration: + return None + + def _finalize_types(self): + """ + Processes all_custom_types into custom_types and preset_dep_custom_types. + Calls helper functions to update KZG and CURDLEPROOFS setups if needed. + """ + # Update KZG trusted setup if needed + if any('KZG_SETUP' in name for name in self.constant_vars): + _update_constant_vars_with_kzg_setups( + self.constant_vars, self.preset_dep_constant_vars, self.preset_name + ) + + # Update CURDLEPROOFS CRS if needed + if any('CURDLEPROOFS_CRS' in name for name in self.constant_vars): + _update_constant_vars_with_curdleproofs_crs( + self.constant_vars, self.preset_dep_constant_vars, self.preset_name + ) + + # Split all_custom_types into custom_types and preset_dep_custom_types + self.custom_types = {} + self.preset_dep_custom_types = {} + for name, value in self.all_custom_types.items(): + if any(k in value for k in self.preset) or any(k in value for k in self.preset_dep_constant_vars): + self.preset_dep_custom_types[name] = value + else: + self.custom_types[name] = value + + def _parse_document(self): + """ + Opens the markdown file, parses its content into a document object using _parse_markdown, + and stores the parsed document in self.document. + """ + with open(self.file_name) as source_file: + self.document = parse_markdown(source_file.read()) + + def _process_child(self, child): + # Skip blank lines + if isinstance(child, BlankLine): + return + + if self.should_skip: + self.should_skip = False + return + + # Dispatch to the correct handler + if isinstance(child, Heading): + self._process_heading(child) + elif isinstance(child, FencedCode): + self._process_code_block(child) + elif isinstance(child, Table): + # Handler for list-of-records is managed by state in _process_html_block + if self.list_of_records is not None: + self._process_list_of_records_table(child) + else: + self._process_table(child) + elif isinstance(child, HTMLBlock): + self._process_html_block(child) + + def _process_heading(self, child): + """ + Extracts the section name from the heading and updates current_name for context. + """ + if not isinstance(child, Heading): + return + self.current_name = _get_name_from_heading(child) + # else: skip unknown types + + def _process_code_block(self, child): + """ + Processes a FencedCode block: + - Checks if the code block is Python. + - Extracts source code and determines if it is a function, dataclass, or class. + - Updates the appropriate dictionary (functions, protocols, dataclasses, ssz_objects). + """ + if child.lang != "python": + return + + source = _get_source_from_code_block(child) + + if source.startswith("def"): + self._process_code_def(source) + elif source.startswith("@dataclass"): + self._process_code_dataclass(source) + elif source.startswith("class"): + self._process_code_class(source) + else: + raise Exception("unrecognized python code element: " + source) + + def _process_code_def(self, source): + self.current_name = _get_function_name_from_source(source) + self_type_name = _get_self_type_from_source(source) + function_def = "\n".join(line.rstrip() for line in source.splitlines()) + if self_type_name is None: + self.functions[self.current_name] = function_def + else: + if self_type_name not in self.protocols: + self.protocols[self_type_name] = ProtocolDefinition( + functions={}) + self.protocols[self_type_name].functions[self.current_name] = function_def + + def _process_code_dataclass(self, source): + """ if self.current_name is None: + raise Exception(f"found @dataclass without a name: {source}")""" + self.dataclasses[self.current_name] = "\n".join( + line.rstrip() for line in source.splitlines()) + + def _process_code_class(self, source): + class_name, parent_class = _get_class_info_from_source(source) + # check consistency with spec + if class_name != self.current_name: + raise Exception( + f"class_name {class_name} != current_name {self.current_name}") + + if parent_class: + assert parent_class == "Container" + self.ssz_objects[self.current_name] = "\n".join( + line.rstrip() for line in source.splitlines()) + + def _process_table(self, child): + """ + Handles standard tables (not list-of-records). + Iterates over rows, extracting variable names, values, and descriptions. + Determines if the variable is a constant, preset, config, or custom type. + Updates the corresponding dictionaries. + Handles special cases for predefined types and function-dependent presets. + """ + + for row in child.children: + cells = row.children + if len(cells) >= 2: + name_cell = cells[0] + name = name_cell.children[0].children + + value_cell = cells[1] + value = value_cell.children[0].children + + description = None + if len(cells) >= 3: + description_cell = cells[2] + if len(description_cell.children) > 0: + description = description_cell.children[0].children + if isinstance(description, list): + description = description[0].children + + if isinstance(name, list): + name = name[0].children + if isinstance(value, list): + value = value[0].children + + # Skip types that have been defined elsewhere + if description is not None and description.startswith(""): + continue + + if not _is_constant_id(name): + # Check for short type declarations + if value.startswith(("uint", "Bytes", "ByteList", "Union", "Vector", "List", "ByteVector")): + self.all_custom_types[name] = value + continue + + if value.startswith("get_generalized_index"): + self.ssz_dep_constants[name] = value + continue + + if description is not None and description.startswith(""): + self.func_dep_presets[name] = value + + value_def = _parse_value(name, value) + if name in self.preset: + if self.preset_name == "mainnet": + check_yaml_matches_spec( + name, self.preset, value_def) + self.preset_vars[name] = VariableDefinition( + value_def.type_name, self.preset[name], value_def.comment, None) + elif name in self.config: + if self.preset_name == "mainnet": + check_yaml_matches_spec( + name, self.config, value_def) + self.config_vars[name] = VariableDefinition( + value_def.type_name, self.config[name], value_def.comment, None) + else: + if name in ('ENDIANNESS', 'KZG_ENDIANNESS'): + # Deal with mypy Literal typing check + value_def = _parse_value( + name, value, type_hint='Final') + if any(k in value for k in self.preset) or any(k in value for k in self.preset_dep_constant_vars): + self.preset_dep_constant_vars[name] = value_def + else: + self.constant_vars[name] = value_def + + def _process_list_of_records_table(self, child): + """ + Handles tables marked as 'list-of-records'. + Extracts headers and rows, mapping field names and types. + Applies type mapping to config entries. + Validates or updates the config variable as needed based on preset_name. + Updates config_vars with the processed list. + """ + + list_of_records_header = None + for i, row in enumerate(child.children): + if i == 0: + # Save the table header, used for field names (skip last item: description) + list_of_records_header = [ + re.sub(r'\s+', '_', value.children[0].children.upper()) + for value in row.children[:-1] + ] + else: + # Add the row entry to our list of records + self.list_of_records.append({ + list_of_records_header[j]: value.children[0].children + for j, value in enumerate(row.children[:-1]) + }) + + # Make a type map from the spec definition + type_map: dict[str, str] = {} + pattern = re.compile(r'^(\w+)\(.*\)$') + for entry in self.list_of_records: + for k, v in entry.items(): + m = pattern.match(v) + if m: + type_map[k] = m.group(1) + + # Apply the types to the file config + list_of_records_config: list[dict[str, str]] = [] + for entry in self.config[self.list_of_records_name]: + new_entry = {} + for k, v in entry.items(): + ctor = type_map.get(k) + if ctor: + new_entry[k] = f"{ctor}({v})" + else: + new_entry[k] = v + list_of_records_config.append(new_entry) + + # For mainnet, check that the spec config & file config are the same + if self.preset_name == "mainnet": + assert self.list_of_records == list_of_records_config, \ + f"list of records mismatch: {self.list_of_records} vs {list_of_records_config}" + elif self.preset_name == "minimal": + self.list_of_records = list_of_records_config + + # Set the config variable and reset the state + self.config_vars[self.list_of_records_name] = self.list_of_records + self.list_of_records = None + + def _process_html_block(self, child): + """ + Handles HTML comments for skip logic and list-of-records detection. + Sets flags or state variables for the next iteration. + """ + + body = child.body.strip() + if body == "": + self.should_skip = True + # Handle list-of-records tables + match = re.match( + r"", body) + if match: + self.list_of_records = [] + self.list_of_records_name = match.group(1).upper() + + def _build_spec_object(self): + """ + Constructs and returns the SpecObject using all collected data. + """ + return SpecObject( + functions=self.functions, + protocols=self.protocols, + custom_types=self.custom_types, + preset_dep_custom_types=self.preset_dep_custom_types, + constant_vars=self.constant_vars, + preset_dep_constant_vars=self.preset_dep_constant_vars, + preset_vars=self.preset_vars, + config_vars=self.config_vars, + ssz_dep_constants=self.ssz_dep_constants, + func_dep_presets=self.func_dep_presets, + ssz_objects=self.ssz_objects, + dataclasses=self.dataclasses, + ) + +@lru_cache(maxsize=None) +def _get_name_from_heading(heading: Heading) -> Optional[str]: + last_child = heading.children[-1] + if isinstance(last_child, CodeSpan): + return last_child.children + return None + + +@lru_cache(maxsize=None) +def _get_source_from_code_block(block: FencedCode) -> str: + return block.children[0].children.strip() + + +@lru_cache(maxsize=None) +def _get_function_name_from_source(source: str) -> str: + fn = ast.parse(source).body[0] + return fn.name + + +@lru_cache(maxsize=None) +def _get_self_type_from_source(source: str) -> Optional[str]: + fn = ast.parse(source).body[0] + args = fn.args.args + if len(args) == 0: + return None + if args[0].arg != 'self': + return None + if args[0].annotation is None: + return None + return args[0].annotation.id + + +@lru_cache(maxsize=None) +def _get_class_info_from_source(source: str) -> Tuple[str, Optional[str]]: + class_def = ast.parse(source).body[0] + base = class_def.bases[0] + if isinstance(base, ast.Name): + parent_class = base.id + elif isinstance(base, ast.Subscript): + parent_class = base.value.id + else: + # NOTE: SSZ definition derives from earlier phase... + # e.g. `phase0.SignedBeaconBlock` + # TODO: check for consistency with other phases + parent_class = None + return class_def.name, parent_class + + +@lru_cache(maxsize=None) +def _is_constant_id(name: str) -> bool: + if name[0] not in string.ascii_uppercase + '_': + return False + return all(map(lambda c: c in string.ascii_uppercase + '_' + string.digits, name[1:])) + +@lru_cache(maxsize=None) +def _load_kzg_trusted_setups(preset_name): + trusted_setups_file_path = str(Path(__file__).parent.parent) + '/presets/' + preset_name + '/trusted_setups/trusted_setup_4096.json' + + with open(trusted_setups_file_path, 'r') as f: + json_data = json.load(f) + trusted_setup_G1_monomial = json_data['g1_monomial'] + trusted_setup_G1_lagrange = json_data['g1_lagrange'] + trusted_setup_G2_monomial = json_data['g2_monomial'] + + return trusted_setup_G1_monomial, trusted_setup_G1_lagrange, trusted_setup_G2_monomial + +@lru_cache(maxsize=None) +def _load_curdleproofs_crs(preset_name): + """ + NOTE: File generated from https://github.com/asn-d6/curdleproofs/blob/8e8bf6d4191fb6a844002f75666fb7009716319b/tests/crs.rs#L53-L67 + """ + file_path = str(Path(__file__).parent.parent) + '/presets/' + preset_name + '/trusted_setups/curdleproofs_crs.json' + + with open(file_path, 'r') as f: + json_data = json.load(f) + + return json_data + + +ALL_KZG_SETUPS = { + 'minimal': _load_kzg_trusted_setups('minimal'), + 'mainnet': _load_kzg_trusted_setups('mainnet') +} + +ALL_CURDLEPROOFS_CRS = { + 'minimal': _load_curdleproofs_crs('minimal'), + 'mainnet': _load_curdleproofs_crs('mainnet'), +} + +@lru_cache(maxsize=None) +def _parse_value(name: str, typed_value: str, type_hint: Optional[str] = None) -> VariableDefinition: + comment = None + if name in ("ROOT_OF_UNITY_EXTENDED", "ROOTS_OF_UNITY_EXTENDED", "ROOTS_OF_UNITY_REDUCED"): + comment = "noqa: E501" + + typed_value = typed_value.strip() + if '(' not in typed_value: + return VariableDefinition(type_name=None, value=typed_value, comment=comment, type_hint=type_hint) + i = typed_value.index('(') + type_name = typed_value[:i] + + return VariableDefinition(type_name=type_name, value=typed_value[i+1:-1], comment=comment, type_hint=type_hint) + + +def _update_constant_vars_with_kzg_setups(constant_vars, preset_dep_constant_vars, preset_name): + comment = "noqa: E501" + kzg_setups = ALL_KZG_SETUPS[preset_name] + preset_dep_constant_vars['KZG_SETUP_G1_MONOMIAL'] = VariableDefinition( + preset_dep_constant_vars['KZG_SETUP_G1_MONOMIAL'].value, + str(kzg_setups[0]), + comment, None + ) + preset_dep_constant_vars['KZG_SETUP_G1_LAGRANGE'] = VariableDefinition( + preset_dep_constant_vars['KZG_SETUP_G1_LAGRANGE'].value, + str(kzg_setups[1]), + comment, None + ) + constant_vars['KZG_SETUP_G2_MONOMIAL'] = VariableDefinition( + constant_vars['KZG_SETUP_G2_MONOMIAL'].value, + str(kzg_setups[2]), + comment, None + ) + + +def _update_constant_vars_with_curdleproofs_crs(constant_vars, preset_dep_constant_vars, preset_name): + comment = "noqa: E501" + constant_vars['CURDLEPROOFS_CRS'] = VariableDefinition( + None, + 'curdleproofs.CurdleproofsCrs.from_json(json.dumps(' + str(ALL_CURDLEPROOFS_CRS[str(preset_name)]).replace('0x', '') + '))', + comment, None + ) + + +@lru_cache(maxsize=None) +def parse_markdown(content: str): + return gfm.parse(content) + + +def check_yaml_matches_spec(var_name, yaml, value_def): + """ + This function performs a sanity check for presets & configs. To a certain degree, it ensures + that the values in the specifications match those in the yaml files. + """ + if var_name == "TERMINAL_BLOCK_HASH": + # This is just Hash32() in the specs, that's fine + return + + # We use a var in the definition of a new var, replace usages + # Reverse sort so that overridden values come first + updated_value = value_def.value + for var in sorted(yaml.keys(), reverse=True): + if var in updated_value: + updated_value = updated_value.replace(var, yaml[var]) + try: + assert yaml[var_name] == repr(eval(updated_value)), \ + f"mismatch for {var_name}: {yaml[var_name]} vs {eval(updated_value)}" + except NameError: + # Okay it's probably something more serious, let's ignore + pass diff --git a/setup.py b/setup.py index 3a3b27be75..afefe7b565 100644 --- a/setup.py +++ b/setup.py @@ -21,6 +21,7 @@ from setuptools import setup, find_packages, Command from setuptools.command.build_py import build_py from typing import Dict, List, Sequence, Optional, Tuple +from deepdiff import DeepDiff pysetup_path = os.path.abspath(os.path.dirname(__file__)) sys.path.insert(0, pysetup_path) @@ -46,7 +47,7 @@ SpecObject, VariableDefinition, ) - +from pysetup.mk_to_spec import MarkdownToSpec # Ignore '1.5.0-alpha.*' to '1.5.0a*' messages. warnings.filterwarnings('ignore', message='Normalizing .* to .*') @@ -220,6 +221,8 @@ def check_yaml_matches_spec(var_name, yaml, value_def): # Okay it's probably something more serious, let's ignore pass +def get_spec_new(file_name: Path, preset: Dict[str, str], config: Dict[str, str], preset_name=str) -> SpecObject: + return MarkdownToSpec(file_name, preset, config, preset_name).run() def get_spec(file_name: Path, preset: Dict[str, str], config: Dict[str, str], preset_name=str) -> SpecObject: functions: Dict[str, str] = {} @@ -477,7 +480,10 @@ def build_spec(fork: str, config_file: Path) -> str: preset = load_preset(tuple(preset_files)) config = load_config(config_file) - all_specs = [get_spec(spec, preset, config, preset_name) for spec in source_files] + all_specs = [get_spec_new(spec, preset, config, preset_name) for spec in source_files] + all_specs_old = [get_spec(spec, preset, config, preset_name) for spec in source_files] + + assert DeepDiff(all_specs, all_specs_old, ignore_order=True) == {}, f"specs differ: {DeepDiff(all_specs, all_specs_old, ignore_order=True)}" spec_object = all_specs[0] for value in all_specs[1:]: @@ -676,3 +682,4 @@ def run(self): py_modules=["eth2spec"], cmdclass=commands, ) + From f0adad625313ed922457f3a27b6930c8f2a79d2c Mon Sep 17 00:00:00 2001 From: Leo Lara Date: Fri, 16 May 2025 16:13:20 +0000 Subject: [PATCH 02/18] Second checkpoint --- pyproject.toml | 1 + pysetup/mk_to_spec.py | 490 +++++++++++++++++++++++++----------------- setup.py | 4 +- 3 files changed, 296 insertions(+), 199 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 01167d9053..b310ea598b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,6 +4,7 @@ requires = [ "ruamel.yaml==0.18.10", "setuptools==80.4.0", "wheel==0.45.1", + "deepdiff==8.5.0", ] [project] diff --git a/pysetup/mk_to_spec.py b/pysetup/mk_to_spec.py index df97b86e3c..c17689bb4e 100644 --- a/pysetup/mk_to_spec.py +++ b/pysetup/mk_to_spec.py @@ -2,12 +2,13 @@ import json from pathlib import Path import string -from typing import Dict, Optional, Tuple +from typing import Dict, Optional, Tuple, Iterator import re from functools import lru_cache -from marko.block import BlankLine, Heading, FencedCode, HTMLBlock +from marko.block import BlankLine, Heading, FencedCode, HTMLBlock, Document +from marko.element import Element from marko.ext.gfm import gfm from marko.ext.gfm.elements import Table from marko.inline import CodeSpan @@ -16,7 +17,6 @@ class MarkdownToSpec: def __init__(self, file_name: Path, preset: Dict[str, str], config: Dict[str, str], preset_name: str): - self.file_name = file_name self.preset = preset self.config = config self.preset_name = preset_name @@ -35,12 +35,8 @@ def __init__(self, file_name: Path, preset: Dict[str, str], config: Dict[str, st self.custom_types: Dict[str, str] = {} self.preset_dep_custom_types: Dict[str, str] = {} - self.document = None - self.document_iterator = None - self.current_name = None - self.should_skip = False - self.list_of_records = None - self.list_of_records_name = None + self.document_iterator: Iterator[Element] = self._parse_document(file_name) + self.current_heading_name: str | None = None def run(self) -> SpecObject: """ @@ -51,146 +47,135 @@ def run(self) -> SpecObject: Returns: SpecObject: The constructed specification object. """ - self._parse_document() - # self.document_iterator = iter(self.document.children) - # while (child := self._get_next_element()) is not None: - for child in self.document.children: + while (child := self._get_next_element()) is not None: self._process_child(child) self._finalize_types() return self._build_spec_object() - def _get_next_element(self): + def _get_next_element(self) -> Optional[Element]: """ Returns the next element in the document. If the end of the document is reached, returns None. """ try: - # while isinstance(result := next(self.document_iterator), BlankLine): - # pass - # return result - next(self.document_iterator) + while isinstance(result := next(self.document_iterator), BlankLine): + pass + return result except StopIteration: return None - def _finalize_types(self): + def _skip_element(self) -> None: """ - Processes all_custom_types into custom_types and preset_dep_custom_types. - Calls helper functions to update KZG and CURDLEPROOFS setups if needed. + Skips the current element in the document. + This is a placeholder for future functionality. """ - # Update KZG trusted setup if needed - if any('KZG_SETUP' in name for name in self.constant_vars): - _update_constant_vars_with_kzg_setups( - self.constant_vars, self.preset_dep_constant_vars, self.preset_name - ) - - # Update CURDLEPROOFS CRS if needed - if any('CURDLEPROOFS_CRS' in name for name in self.constant_vars): - _update_constant_vars_with_curdleproofs_crs( - self.constant_vars, self.preset_dep_constant_vars, self.preset_name - ) + self._get_next_element() - # Split all_custom_types into custom_types and preset_dep_custom_types - self.custom_types = {} - self.preset_dep_custom_types = {} - for name, value in self.all_custom_types.items(): - if any(k in value for k in self.preset) or any(k in value for k in self.preset_dep_constant_vars): - self.preset_dep_custom_types[name] = value - else: - self.custom_types[name] = value + - def _parse_document(self): + def _parse_document(self, file_name: Path) -> Iterator[Element]: """ Opens the markdown file, parses its content into a document object using _parse_markdown, and stores the parsed document in self.document. """ - with open(self.file_name) as source_file: - self.document = parse_markdown(source_file.read()) + with open(file_name) as source_file: + document = parse_markdown(source_file.read()) + return iter(document.children) - def _process_child(self, child): + def _process_child(self, child: Element): # Skip blank lines if isinstance(child, BlankLine): return - if self.should_skip: - self.should_skip = False - return - - # Dispatch to the correct handler - if isinstance(child, Heading): - self._process_heading(child) - elif isinstance(child, FencedCode): - self._process_code_block(child) - elif isinstance(child, Table): - # Handler for list-of-records is managed by state in _process_html_block - if self.list_of_records is not None: - self._process_list_of_records_table(child) - else: + # Dispatch to the correct handler + match child: + case Heading(): + self._process_heading(child) + case FencedCode(): + self._process_code_block(child) + case Table(): self._process_table(child) - elif isinstance(child, HTMLBlock): - self._process_html_block(child) + case HTMLBlock(): + self._process_html_block(child) - def _process_heading(self, child): + def _process_heading(self, heading: Heading): """ Extracts the section name from the heading and updates current_name for context. """ - if not isinstance(child, Heading): + if not isinstance(heading, Heading): return - self.current_name = _get_name_from_heading(child) + self.current_heading_name = _get_name_from_heading(heading) # else: skip unknown types - def _process_code_block(self, child): + def _process_code_block(self, code_block: FencedCode): """ Processes a FencedCode block: - Checks if the code block is Python. - Extracts source code and determines if it is a function, dataclass, or class. - Updates the appropriate dictionary (functions, protocols, dataclasses, ssz_objects). """ - if child.lang != "python": + if code_block.lang != "python": return - source = _get_source_from_code_block(child) + source = _get_source_from_code_block(code_block) + module = ast.parse(source) - if source.startswith("def"): - self._process_code_def(source) - elif source.startswith("@dataclass"): - self._process_code_dataclass(source) - elif source.startswith("class"): - self._process_code_class(source) + clean_source = "\n".join(line.rstrip() for line in source.splitlines()) + # AST container of the first definition in the block + first_def = module.body[0] + + if isinstance(first_def, ast.FunctionDef): + self._process_code_def(clean_source, first_def) + elif isinstance(first_def, ast.ClassDef) and _has_decorator(first_def, "dataclass"): + self._add_dataclass(clean_source, first_def) + elif isinstance(first_def, ast.ClassDef): + self._process_code_class(clean_source, first_def) else: raise Exception("unrecognized python code element: " + source) - def _process_code_def(self, source): - self.current_name = _get_function_name_from_source(source) - self_type_name = _get_self_type_from_source(source) - function_def = "\n".join(line.rstrip() for line in source.splitlines()) + def _process_code_def(self, source: str, fn: ast.FunctionDef): + """ + Processes a function definition node from the AST and stores its source code representation. + If the function is a method (i.e., has a self type), it is added to the protocol functions for that type. + Otherwise, it is stored as a standalone function. + Args: + source (str): The source code of the function definition. + fn (ast.FunctionDef): The AST node representing the function definition. + """ + + self_type_name = _get_self_type_from_source(fn) + if self_type_name is None: - self.functions[self.current_name] = function_def + self.functions[fn.name] = source else: - if self_type_name not in self.protocols: - self.protocols[self_type_name] = ProtocolDefinition( - functions={}) - self.protocols[self_type_name].functions[self.current_name] = function_def - - def _process_code_dataclass(self, source): - """ if self.current_name is None: - raise Exception(f"found @dataclass without a name: {source}")""" - self.dataclasses[self.current_name] = "\n".join( - line.rstrip() for line in source.splitlines()) - - def _process_code_class(self, source): - class_name, parent_class = _get_class_info_from_source(source) + self._add_protocol_function(self_type_name, fn.name, source) + + def _add_protocol_function(self, protocol_name: str, function_name: str, function_def: str): + """ + Adds a function definition to the protocol functions dictionary. + """ + + if protocol_name not in self.protocols: + self.protocols[protocol_name] = ProtocolDefinition( + functions={}) + self.protocols[protocol_name].functions[function_name] = function_def + + def _add_dataclass(self, source, cls: ast.ClassDef): + self.dataclasses[cls.name] = source + + def _process_code_class(self, source, cls: ast.ClassDef): + class_name, parent_class = _get_class_info_from_ast(cls) + # check consistency with spec - if class_name != self.current_name: - raise Exception( - f"class_name {class_name} != current_name {self.current_name}") + if class_name != self.current_heading_name: + raise Exception(f"class_name {class_name} != current_name {self.current_heading_name}") if parent_class: assert parent_class == "Container" - self.ssz_objects[self.current_name] = "\n".join( - line.rstrip() for line in source.splitlines()) + self.ssz_objects[class_name] = source - def _process_table(self, child): + def _process_table(self, child: HTMLBlock): """ Handles standard tables (not list-of-records). Iterates over rows, extracting variable names, values, and descriptions. @@ -200,103 +185,170 @@ def _process_table(self, child): """ for row in child.children: - cells = row.children - if len(cells) >= 2: - name_cell = cells[0] - name = name_cell.children[0].children - - value_cell = cells[1] - value = value_cell.children[0].children - - description = None - if len(cells) >= 3: - description_cell = cells[2] - if len(description_cell.children) > 0: - description = description_cell.children[0].children - if isinstance(description, list): - description = description[0].children - - if isinstance(name, list): - name = name[0].children - if isinstance(value, list): - value = value[0].children - - # Skip types that have been defined elsewhere - if description is not None and description.startswith(""): - continue - - if not _is_constant_id(name): - # Check for short type declarations - if value.startswith(("uint", "Bytes", "ByteList", "Union", "Vector", "List", "ByteVector")): - self.all_custom_types[name] = value - continue - - if value.startswith("get_generalized_index"): - self.ssz_dep_constants[name] = value - continue - - if description is not None and description.startswith(""): - self.func_dep_presets[name] = value - - value_def = _parse_value(name, value) - if name in self.preset: - if self.preset_name == "mainnet": - check_yaml_matches_spec( - name, self.preset, value_def) - self.preset_vars[name] = VariableDefinition( - value_def.type_name, self.preset[name], value_def.comment, None) - elif name in self.config: - if self.preset_name == "mainnet": - check_yaml_matches_spec( - name, self.config, value_def) - self.config_vars[name] = VariableDefinition( - value_def.type_name, self.config[name], value_def.comment, None) + if len(row.children) < 2: + continue + + name, value, description = self._get_table_row_fields(row) + + # Skip types that have been defined elsewhere + if description is not None and description.startswith(""): + continue + + # If it is not a constant, check if it is a custom type + if not _is_constant_id(name): + # Check for short type declarations + if value.startswith(("uint", "Bytes", "ByteList", "Union", "Vector", "List", "ByteVector")): + self.all_custom_types[name] = value + continue + + # It is a constant name and a generalized index + if value.startswith("get_generalized_index"): + self.ssz_dep_constants[name] = value + continue + + # It is a constant and not a generalized index, and a function-dependent preset + if description is not None and description.startswith(""): + self.func_dep_presets[name] = value + + # It is a constant and not a generalized index + value_def = _parse_value(name, value) + # It is a preset + if name in self.preset: + if self.preset_name == "mainnet": + check_yaml_matches_spec(name, self.preset, value_def) + + self.preset_vars[name] = VariableDefinition(value_def.type_name, self.preset[name], value_def.comment, None) + + # It is a config variable + elif name in self.config: + if self.preset_name == "mainnet": + check_yaml_matches_spec(name, self.config, value_def) + + self.config_vars[name] = VariableDefinition(value_def.type_name, self.config[name], value_def.comment, None) + + # It is a constant variable or a preset_dep_constant_vars + else: + if name in ('ENDIANNESS', 'KZG_ENDIANNESS'): + # Deal with mypy Literal typing check + value_def = _parse_value(name, value, type_hint='Final') + if any(k in value for k in self.preset) or any(k in value for k in self.preset_dep_constant_vars): + self.preset_dep_constant_vars[name] = value_def else: - if name in ('ENDIANNESS', 'KZG_ENDIANNESS'): - # Deal with mypy Literal typing check - value_def = _parse_value( - name, value, type_hint='Final') - if any(k in value for k in self.preset) or any(k in value for k in self.preset_dep_constant_vars): - self.preset_dep_constant_vars[name] = value_def - else: - self.constant_vars[name] = value_def + self.constant_vars[name] = value_def + + @staticmethod + def _get_table_row_fields(row: Element) -> tuple[str, str, Optional[str]]: + """ + Extracts the name, value, and description fields from a table row element. + Description can be None. + """ + cells = row.children + name_cell = cells[0] + name = name_cell.children[0].children + + value_cell = cells[1] + value = value_cell.children[0].children + + if isinstance(name, list): + name = name[0].children + if isinstance(value, list): + value = value[0].children + + description = None + if len(cells) >= 3: + description_cell = cells[2] + if len(description_cell.children) > 0: + description = description_cell.children[0].children + if isinstance(description, list): + description = description[0].children - def _process_list_of_records_table(self, child): + return name, value, description + + def _process_list_of_records_table(self, child, list_of_records_name): """ Handles tables marked as 'list-of-records'. Extracts headers and rows, mapping field names and types. Applies type mapping to config entries. Validates or updates the config variable as needed based on preset_name. Updates config_vars with the processed list. - """ - list_of_records_header = None - for i, row in enumerate(child.children): - if i == 0: - # Save the table header, used for field names (skip last item: description) - list_of_records_header = [ - re.sub(r'\s+', '_', value.children[0].children.upper()) - for value in row.children[:-1] - ] - else: - # Add the row entry to our list of records - self.list_of_records.append({ - list_of_records_header[j]: value.children[0].children - for j, value in enumerate(row.children[:-1]) - }) + Example of input: + | Epoch | Max Blobs Per Block | Description | + | --------------------------- | ------------------- | -------------------------------- | + | `Epoch(269568)` **Deneb** | `uint64(6)` | The limit is set to `6` blobs | + | `Epoch(364032)` **Electra** | `uint64(9)` | The limit is raised to `9` blobs | + + The method _process_html_block calls this method when it encounters a comment + of the form ``. + """ + list_of_records_spec = self._extract_list_of_records_spec(child) # Make a type map from the spec definition + type_map = self._make_list_of_records_type_map(list_of_records_spec) + + # Apply the types to the file config + list_of_records_config_file = self._extract_typed_records_config( + list_of_records_name, type_map + ) + + # For mainnet, check that the spec config & file config are the same + # For minimal, we expect this to be different; just use the file config + if self.preset_name == "mainnet": + assert list_of_records_spec == list_of_records_config_file, \ + f"list of records mismatch: {list_of_records_spec} vs {list_of_records_config_file}" + + # Set the config variable + self.config_vars[list_of_records_name] = list_of_records_config_file + + @staticmethod + def _make_list_of_records_type_map(list_of_records: list[dict[str, str]]) -> dict[str, str]: + """ + Given a list of records (each a dict of field name to value), extract a mapping + from field name to type name, based on values of the form 'TypeName(...)'. + """ type_map: dict[str, str] = {} pattern = re.compile(r'^(\w+)\(.*\)$') - for entry in self.list_of_records: + for entry in list_of_records: for k, v in entry.items(): m = pattern.match(v) if m: type_map[k] = m.group(1) + return type_map - # Apply the types to the file config - list_of_records_config: list[dict[str, str]] = [] - for entry in self.config[self.list_of_records_name]: + @staticmethod + def _extract_list_of_records_spec(child) -> list[dict[str, str]]: + """ + Extracts the list of records from a table element. + Returns a list of dicts, each representing a row with field names as keys. + """ + + # Save the table header, used for field names (skip last item: description) + header_row = child.children[0] + list_of_records_spec_header = [ + re.sub(r'\s+', '_', value.children[0].children.upper()) + for value in header_row.children[:-1] + ] + + # Process the remaining rows + list_of_records_spec: list[dict[str, str]] = [ + { + list_of_records_spec_header[j]: value.children[0].children + for j, value in enumerate(row.children[:-1]) + } + for row in child.children[1:] + ] + + return list_of_records_spec + + def _extract_typed_records_config( + self, list_of_records_name: str, type_map: dict[str, str] + ) -> list[dict[str, str]]: + """ + Applies type constructors to config entries based on the type map. + Returns a new list of dicts with types applied. + """ + list_of_records_config_file: list[dict[str, str]] = [] + for entry in self.config[list_of_records_name]: new_entry = {} for k, v in entry.items(): ctor = type_map.get(k) @@ -304,18 +356,8 @@ def _process_list_of_records_table(self, child): new_entry[k] = f"{ctor}({v})" else: new_entry[k] = v - list_of_records_config.append(new_entry) - - # For mainnet, check that the spec config & file config are the same - if self.preset_name == "mainnet": - assert self.list_of_records == list_of_records_config, \ - f"list of records mismatch: {self.list_of_records} vs {list_of_records_config}" - elif self.preset_name == "minimal": - self.list_of_records = list_of_records_config - - # Set the config variable and reset the state - self.config_vars[self.list_of_records_name] = self.list_of_records - self.list_of_records = None + list_of_records_config_file.append(new_entry) + return list_of_records_config_file def _process_html_block(self, child): """ @@ -324,14 +366,48 @@ def _process_html_block(self, child): """ body = child.body.strip() + + # This comment marks that we should skip the next element if body == "": - self.should_skip = True + self._skip_element() + # Handle list-of-records tables + # This comment marks that the next table is a list-of-records + # e.g. match = re.match( r"", body) if match: - self.list_of_records = [] - self.list_of_records_name = match.group(1).upper() + table_element = self._get_next_element() + if not isinstance(table_element, Table): + raise Exception( + f"expected table after list-of-records comment, got {type(table_element)}") + self._process_list_of_records_table(table_element, match.group(1).upper()) + + def _finalize_types(self): + """ + Processes all_custom_types into custom_types and preset_dep_custom_types. + Calls helper functions to update KZG and CURDLEPROOFS setups if needed. + """ + # Update KZG trusted setup if needed + if any('KZG_SETUP' in name for name in self.constant_vars): + _update_constant_vars_with_kzg_setups( + self.constant_vars, self.preset_dep_constant_vars, self.preset_name + ) + + # Update CURDLEPROOFS CRS if needed + if any('CURDLEPROOFS_CRS' in name for name in self.constant_vars): + _update_constant_vars_with_curdleproofs_crs( + self.constant_vars, self.preset_dep_constant_vars, self.preset_name + ) + + # Split all_custom_types into custom_types and preset_dep_custom_types + self.custom_types = {} + self.preset_dep_custom_types = {} + for name, value in self.all_custom_types.items(): + if any(k in value for k in self.preset) or any(k in value for k in self.preset_dep_constant_vars): + self.preset_dep_custom_types[name] = value + else: + self.custom_types[name] = value def _build_spec_object(self): """ @@ -368,12 +444,13 @@ def _get_source_from_code_block(block: FencedCode) -> str: @lru_cache(maxsize=None) def _get_function_name_from_source(source: str) -> str: fn = ast.parse(source).body[0] + if not isinstance(fn, ast.FunctionDef): + raise Exception("expected function definition") return fn.name @lru_cache(maxsize=None) -def _get_self_type_from_source(source: str) -> Optional[str]: - fn = ast.parse(source).body[0] +def _get_self_type_from_source(fn: ast.FunctionDef) -> Optional[str]: args = fn.args.args if len(args) == 0: return None @@ -385,9 +462,8 @@ def _get_self_type_from_source(source: str) -> Optional[str]: @lru_cache(maxsize=None) -def _get_class_info_from_source(source: str) -> Tuple[str, Optional[str]]: - class_def = ast.parse(source).body[0] - base = class_def.bases[0] +def _get_class_info_from_ast(cls: ast.ClassDef) -> Tuple[str, Optional[str]]: + base = cls.bases[0] if isinstance(base, ast.Name): parent_class = base.id elif isinstance(base, ast.Subscript): @@ -397,11 +473,22 @@ def _get_class_info_from_source(source: str) -> Tuple[str, Optional[str]]: # e.g. `phase0.SignedBeaconBlock` # TODO: check for consistency with other phases parent_class = None - return class_def.name, parent_class + return cls.name, parent_class @lru_cache(maxsize=None) def _is_constant_id(name: str) -> bool: + """ + Check if the given name follows the convention for constant identifiers. + A valid constant identifier must: + - Start with an uppercase ASCII letter or an underscore ('_'). + - All subsequent characters (if any) must be uppercase ASCII letters, underscores, or digits. + Args: + name (str): The identifier name to check. + Returns: + bool: True if the name is a valid constant identifier, False otherwise. + """ + if name[0] not in string.ascii_uppercase + '_': return False return all(map(lambda c: c in string.ascii_uppercase + '_' + string.digits, name[1:])) @@ -486,7 +573,7 @@ def _update_constant_vars_with_curdleproofs_crs(constant_vars, preset_dep_consta @lru_cache(maxsize=None) -def parse_markdown(content: str): +def parse_markdown(content: str) -> Document: return gfm.parse(content) @@ -511,3 +598,12 @@ def check_yaml_matches_spec(var_name, yaml, value_def): except NameError: # Okay it's probably something more serious, let's ignore pass + +def _has_decorator(decorateable: ast.expr, name: str) -> bool: + return any(_is_decorator(d, name) for d in decorateable.decorator_list) + +def _is_decorator(decorator: ast.expr, name: str) -> bool: + return (isinstance(decorator, ast.Name) and decorator.id == name) or \ + (isinstance(decorator, ast.Attribute) and decorator.attr == name) or \ + (isinstance(decorator, ast.Call) and decorator.func.id == name) or \ + (isinstance(decorator, ast.Subscript) and decorator.value.id == name) diff --git a/setup.py b/setup.py index afefe7b565..221d20f8bc 100644 --- a/setup.py +++ b/setup.py @@ -267,7 +267,7 @@ def get_spec(file_name: Path, preset: Dict[str, str], config: Dict[str, str], pr protocols[self_type_name] = ProtocolDefinition(functions={}) protocols[self_type_name].functions[current_name] = function_def elif source.startswith("@dataclass"): - dataclasses[current_name] = "\n".join(line.rstrip() for line in source.splitlines()) + dataclasses[ast.parse(source).body[0].name] = "\n".join(line.rstrip() for line in source.splitlines()) elif source.startswith("class"): class_name, parent_class = _get_class_info_from_source(source) # check consistency with spec @@ -482,7 +482,7 @@ def build_spec(fork: str, config = load_config(config_file) all_specs = [get_spec_new(spec, preset, config, preset_name) for spec in source_files] all_specs_old = [get_spec(spec, preset, config, preset_name) for spec in source_files] - + assert DeepDiff(all_specs, all_specs_old, ignore_order=True) == {}, f"specs differ: {DeepDiff(all_specs, all_specs_old, ignore_order=True)}" spec_object = all_specs[0] From 7afaf9f23bdb56000b117692e96e9d1069e738b7 Mon Sep 17 00:00:00 2001 From: Leo Lara Date: Sun, 18 May 2025 10:22:13 +0000 Subject: [PATCH 03/18] Third checkpoint --- pysetup/mk_to_spec.py | 110 +++++++++++++++++++++--------------------- 1 file changed, 55 insertions(+), 55 deletions(-) diff --git a/pysetup/mk_to_spec.py b/pysetup/mk_to_spec.py index c17689bb4e..feb914f5da 100644 --- a/pysetup/mk_to_spec.py +++ b/pysetup/mk_to_spec.py @@ -21,19 +21,23 @@ def __init__(self, file_name: Path, preset: Dict[str, str], config: Dict[str, st self.config = config self.preset_name = preset_name - self.functions: Dict[str, str] = {} - self.protocols: Dict[str, ProtocolDefinition] = {} - self.constant_vars: Dict[str, VariableDefinition] = {} - self.preset_dep_constant_vars: Dict[str, VariableDefinition] = {} - self.preset_vars: Dict[str, VariableDefinition] = {} - self.config_vars: Dict[str, VariableDefinition] = {} - self.ssz_dep_constants: Dict[str, str] = {} - self.func_dep_presets: Dict[str, str] = {} - self.ssz_objects: Dict[str, str] = {} - self.dataclasses: Dict[str, str] = {} + # Use a single dict to hold all SpecObject fields + self.spec = { + "functions": {}, + "protocols": {}, + "custom_types": {}, + "preset_dep_custom_types": {}, + "constant_vars": {}, + "preset_dep_constant_vars": {}, + "preset_vars": {}, + "config_vars": {}, + "ssz_dep_constants": {}, + "func_dep_presets": {}, + "ssz_objects": {}, + "dataclasses": {}, + } + self.all_custom_types: Dict[str, str] = {} - self.custom_types: Dict[str, str] = {} - self.preset_dep_custom_types: Dict[str, str] = {} self.document_iterator: Iterator[Element] = self._parse_document(file_name) self.current_heading_name: str | None = None @@ -68,7 +72,6 @@ def _get_next_element(self) -> Optional[Element]: def _skip_element(self) -> None: """ Skips the current element in the document. - This is a placeholder for future functionality. """ self._get_next_element() @@ -101,12 +104,9 @@ def _process_child(self, child: Element): def _process_heading(self, heading: Heading): """ - Extracts the section name from the heading and updates current_name for context. + Extracts the section name from the heading and updates current_heading_name for context. """ - if not isinstance(heading, Heading): - return self.current_heading_name = _get_name_from_heading(heading) - # else: skip unknown types def _process_code_block(self, code_block: FencedCode): """ @@ -119,9 +119,9 @@ def _process_code_block(self, code_block: FencedCode): return source = _get_source_from_code_block(code_block) - module = ast.parse(source) - clean_source = "\n".join(line.rstrip() for line in source.splitlines()) + + module = ast.parse(source) # AST container of the first definition in the block first_def = module.body[0] @@ -147,7 +147,7 @@ def _process_code_def(self, source: str, fn: ast.FunctionDef): self_type_name = _get_self_type_from_source(fn) if self_type_name is None: - self.functions[fn.name] = source + self.spec["functions"][fn.name] = source else: self._add_protocol_function(self_type_name, fn.name, source) @@ -156,13 +156,13 @@ def _add_protocol_function(self, protocol_name: str, function_name: str, functio Adds a function definition to the protocol functions dictionary. """ - if protocol_name not in self.protocols: - self.protocols[protocol_name] = ProtocolDefinition( + if protocol_name not in self.spec["protocols"]: + self.spec["protocols"][protocol_name] = ProtocolDefinition( functions={}) - self.protocols[protocol_name].functions[function_name] = function_def + self.spec["protocols"][protocol_name].functions[function_name] = function_def def _add_dataclass(self, source, cls: ast.ClassDef): - self.dataclasses[cls.name] = source + self.spec["dataclasses"][cls.name] = source def _process_code_class(self, source, cls: ast.ClassDef): class_name, parent_class = _get_class_info_from_ast(cls) @@ -173,7 +173,7 @@ def _process_code_class(self, source, cls: ast.ClassDef): if parent_class: assert parent_class == "Container" - self.ssz_objects[class_name] = source + self.spec["ssz_objects"][class_name] = source def _process_table(self, child: HTMLBlock): """ @@ -203,12 +203,12 @@ def _process_table(self, child: HTMLBlock): # It is a constant name and a generalized index if value.startswith("get_generalized_index"): - self.ssz_dep_constants[name] = value + self.spec["ssz_dep_constants"][name] = value continue # It is a constant and not a generalized index, and a function-dependent preset if description is not None and description.startswith(""): - self.func_dep_presets[name] = value + self.spec["func_dep_presets"][name] = value # It is a constant and not a generalized index value_def = _parse_value(name, value) @@ -217,24 +217,24 @@ def _process_table(self, child: HTMLBlock): if self.preset_name == "mainnet": check_yaml_matches_spec(name, self.preset, value_def) - self.preset_vars[name] = VariableDefinition(value_def.type_name, self.preset[name], value_def.comment, None) + self.spec["preset_vars"][name] = VariableDefinition(value_def.type_name, self.preset[name], value_def.comment, None) # It is a config variable elif name in self.config: if self.preset_name == "mainnet": check_yaml_matches_spec(name, self.config, value_def) - self.config_vars[name] = VariableDefinition(value_def.type_name, self.config[name], value_def.comment, None) + self.spec["config_vars"][name] = VariableDefinition(value_def.type_name, self.config[name], value_def.comment, None) # It is a constant variable or a preset_dep_constant_vars else: if name in ('ENDIANNESS', 'KZG_ENDIANNESS'): # Deal with mypy Literal typing check value_def = _parse_value(name, value, type_hint='Final') - if any(k in value for k in self.preset) or any(k in value for k in self.preset_dep_constant_vars): - self.preset_dep_constant_vars[name] = value_def + if any(k in value for k in self.preset) or any(k in value for k in self.spec["preset_dep_constant_vars"]): + self.spec["preset_dep_constant_vars"][name] = value_def else: - self.constant_vars[name] = value_def + self.spec["constant_vars"][name] = value_def @staticmethod def _get_table_row_fields(row: Element) -> tuple[str, str, Optional[str]]: @@ -298,7 +298,7 @@ def _process_list_of_records_table(self, child, list_of_records_name): f"list of records mismatch: {list_of_records_spec} vs {list_of_records_config_file}" # Set the config variable - self.config_vars[list_of_records_name] = list_of_records_config_file + self.spec["config_vars"][list_of_records_name] = list_of_records_config_file @staticmethod def _make_list_of_records_type_map(list_of_records: list[dict[str, str]]) -> dict[str, str]: @@ -389,43 +389,43 @@ def _finalize_types(self): Calls helper functions to update KZG and CURDLEPROOFS setups if needed. """ # Update KZG trusted setup if needed - if any('KZG_SETUP' in name for name in self.constant_vars): + if any('KZG_SETUP' in name for name in self.spec["constant_vars"]): _update_constant_vars_with_kzg_setups( - self.constant_vars, self.preset_dep_constant_vars, self.preset_name + self.spec["constant_vars"], self.spec["preset_dep_constant_vars"], self.preset_name ) # Update CURDLEPROOFS CRS if needed - if any('CURDLEPROOFS_CRS' in name for name in self.constant_vars): + if any('CURDLEPROOFS_CRS' in name for name in self.spec["constant_vars"]): _update_constant_vars_with_curdleproofs_crs( - self.constant_vars, self.preset_dep_constant_vars, self.preset_name + self.spec["constant_vars"], self.spec["preset_dep_constant_vars"], self.preset_name ) # Split all_custom_types into custom_types and preset_dep_custom_types - self.custom_types = {} - self.preset_dep_custom_types = {} + self.spec["custom_types"] = {} + self.spec["preset_dep_custom_types"] = {} for name, value in self.all_custom_types.items(): - if any(k in value for k in self.preset) or any(k in value for k in self.preset_dep_constant_vars): - self.preset_dep_custom_types[name] = value + if any(k in value for k in self.preset) or any(k in value for k in self.spec["preset_dep_constant_vars"]): + self.spec["preset_dep_custom_types"][name] = value else: - self.custom_types[name] = value + self.spec["custom_types"][name] = value def _build_spec_object(self): """ - Constructs and returns the SpecObject using all collected data. + Returns the SpecObject using all collected data. """ return SpecObject( - functions=self.functions, - protocols=self.protocols, - custom_types=self.custom_types, - preset_dep_custom_types=self.preset_dep_custom_types, - constant_vars=self.constant_vars, - preset_dep_constant_vars=self.preset_dep_constant_vars, - preset_vars=self.preset_vars, - config_vars=self.config_vars, - ssz_dep_constants=self.ssz_dep_constants, - func_dep_presets=self.func_dep_presets, - ssz_objects=self.ssz_objects, - dataclasses=self.dataclasses, + functions=self.spec["functions"], + protocols=self.spec["protocols"], + custom_types=self.spec["custom_types"], + preset_dep_custom_types=self.spec["preset_dep_custom_types"], + constant_vars=self.spec["constant_vars"], + preset_dep_constant_vars=self.spec["preset_dep_constant_vars"], + preset_vars=self.spec["preset_vars"], + config_vars=self.spec["config_vars"], + ssz_dep_constants=self.spec["ssz_dep_constants"], + func_dep_presets=self.spec["func_dep_presets"], + ssz_objects=self.spec["ssz_objects"], + dataclasses=self.spec["dataclasses"], ) @lru_cache(maxsize=None) From ac7a1dc7491483632aaafae7044b022e6f8e87b4 Mon Sep 17 00:00:00 2001 From: Leo Lara Date: Tue, 20 May 2025 11:44:11 +0000 Subject: [PATCH 04/18] Add tests --- Makefile | 2 +- pysetup/mk_to_spec.py | 39 ++-- tests/infra/test_mk_to_spec.py | 333 +++++++++++++++++++++++++++++++++ 3 files changed, 360 insertions(+), 14 deletions(-) create mode 100644 tests/infra/test_mk_to_spec.py diff --git a/Makefile b/Makefile index 25be7bd587..ed9a5270c1 100644 --- a/Makefile +++ b/Makefile @@ -117,7 +117,7 @@ test: pyspec $(PRESET) \ $(BLS) \ --junitxml=$(TEST_REPORT_DIR)/test_results.xml \ - $(PYSPEC_DIR)/eth2spec + $(CURDIR)/tests/infra $(PYSPEC_DIR)/eth2spec ############################################################################### # Coverage diff --git a/pysetup/mk_to_spec.py b/pysetup/mk_to_spec.py index feb914f5da..fea0b90a2f 100644 --- a/pysetup/mk_to_spec.py +++ b/pysetup/mk_to_spec.py @@ -10,7 +10,7 @@ from marko.block import BlankLine, Heading, FencedCode, HTMLBlock, Document from marko.element import Element from marko.ext.gfm import gfm -from marko.ext.gfm.elements import Table +from marko.ext.gfm.elements import Table, TableRow, TableCell from marko.inline import CodeSpan from .typing import ProtocolDefinition, VariableDefinition, SpecObject @@ -86,7 +86,7 @@ def _parse_document(self, file_name: Path) -> Iterator[Element]: document = parse_markdown(source_file.read()) return iter(document.children) - def _process_child(self, child: Element): + def _process_child(self, child: Element) -> None: # Skip blank lines if isinstance(child, BlankLine): return @@ -102,13 +102,13 @@ def _process_child(self, child: Element): case HTMLBlock(): self._process_html_block(child) - def _process_heading(self, heading: Heading): + def _process_heading(self, heading: Heading) -> None: """ Extracts the section name from the heading and updates current_heading_name for context. """ self.current_heading_name = _get_name_from_heading(heading) - def _process_code_block(self, code_block: FencedCode): + def _process_code_block(self, code_block: FencedCode) -> None: """ Processes a FencedCode block: - Checks if the code block is Python. @@ -134,7 +134,7 @@ def _process_code_block(self, code_block: FencedCode): else: raise Exception("unrecognized python code element: " + source) - def _process_code_def(self, source: str, fn: ast.FunctionDef): + def _process_code_def(self, source: str, fn: ast.FunctionDef) -> None: """ Processes a function definition node from the AST and stores its source code representation. If the function is a method (i.e., has a self type), it is added to the protocol functions for that type. @@ -151,7 +151,7 @@ def _process_code_def(self, source: str, fn: ast.FunctionDef): else: self._add_protocol_function(self_type_name, fn.name, source) - def _add_protocol_function(self, protocol_name: str, function_name: str, function_def: str): + def _add_protocol_function(self, protocol_name: str, function_name: str, function_def: str) -> None: """ Adds a function definition to the protocol functions dictionary. """ @@ -161,10 +161,23 @@ def _add_protocol_function(self, protocol_name: str, function_name: str, functio functions={}) self.spec["protocols"][protocol_name].functions[function_name] = function_def - def _add_dataclass(self, source, cls: ast.ClassDef): + def _add_dataclass(self, source, cls: ast.ClassDef) -> None: self.spec["dataclasses"][cls.name] = source - def _process_code_class(self, source, cls: ast.ClassDef): + def _process_code_class(self, source, cls: ast.ClassDef) -> None: + """ + Processes an AST class definition node, validates its consistency with the current heading, + and updates the spec dictionary with the class source code. + Args: + source (str): The source code of the class. + cls (ast.ClassDef): The AST node representing the class definition. + Raises: + Exception: If the class name does not match the current heading name. + AssertionError: If the parent class is not 'Container' when a parent class is present. + Side Effects: + Updates self.spec["ssz_objects"] with the class source code, keyed by class name. + """ + class_name, parent_class = _get_class_info_from_ast(cls) # check consistency with spec @@ -175,7 +188,7 @@ def _process_code_class(self, source, cls: ast.ClassDef): assert parent_class == "Container" self.spec["ssz_objects"][class_name] = source - def _process_table(self, child: HTMLBlock): + def _process_table(self, table: Table): """ Handles standard tables (not list-of-records). Iterates over rows, extracting variable names, values, and descriptions. @@ -184,7 +197,7 @@ def _process_table(self, child: HTMLBlock): Handles special cases for predefined types and function-dependent presets. """ - for row in child.children: + for row in table.children: if len(row.children) < 2: continue @@ -210,7 +223,7 @@ def _process_table(self, child: HTMLBlock): if description is not None and description.startswith(""): self.spec["func_dep_presets"][name] = value - # It is a constant and not a generalized index + # It is a constant and not a generalized index, and not a function-dependent preset value_def = _parse_value(name, value) # It is a preset if name in self.preset: @@ -237,7 +250,7 @@ def _process_table(self, child: HTMLBlock): self.spec["constant_vars"][name] = value_def @staticmethod - def _get_table_row_fields(row: Element) -> tuple[str, str, Optional[str]]: + def _get_table_row_fields(row: TableRow) -> tuple[str, str, Optional[str]]: """ Extracts the name, value, and description fields from a table row element. Description can be None. @@ -599,7 +612,7 @@ def check_yaml_matches_spec(var_name, yaml, value_def): # Okay it's probably something more serious, let's ignore pass -def _has_decorator(decorateable: ast.expr, name: str) -> bool: +def _has_decorator(decorateable: ast.ClassDef | ast.FunctionDef, name: str) -> bool: return any(_is_decorator(d, name) for d in decorateable.decorator_list) def _is_decorator(decorator: ast.expr, name: str) -> bool: diff --git a/tests/infra/test_mk_to_spec.py b/tests/infra/test_mk_to_spec.py new file mode 100644 index 0000000000..829c8ff155 --- /dev/null +++ b/tests/infra/test_mk_to_spec.py @@ -0,0 +1,333 @@ +import pytest +from pathlib import Path +from pysetup.mk_to_spec import MarkdownToSpec + + +@pytest.fixture +def dummy_preset(): + return {"EXAMPLE": "1"} + + +@pytest.fixture +def dummy_config(): + return {"CONFIG": "2"} + + +@pytest.fixture +def dummy_file(tmp_path): + file = tmp_path / "dummy.md" + file.write_text("# Dummy\n") + return file + + +def test_constructor_initializes_fields(dummy_file, dummy_preset, dummy_config): + preset_name = "mainnet" + m2s = MarkdownToSpec( + file_name=Path(dummy_file), + preset=dummy_preset, + config=dummy_config, + preset_name=preset_name, + ) + assert m2s.preset == dummy_preset + assert m2s.config == dummy_config + assert m2s.preset_name == preset_name + assert isinstance(m2s.spec, dict) + assert isinstance(m2s.all_custom_types, dict) + assert hasattr(m2s, "document_iterator") + assert m2s.current_heading_name is None + + +def test_run_returns_spec_object(dummy_file, dummy_preset, dummy_config): + preset_name = "mainnet" + m2s = MarkdownToSpec( + file_name=Path(dummy_file), + preset=dummy_preset, + config=dummy_config, + preset_name=preset_name, + ) + spec_obj = m2s.run() + # Check that the result is of the expected type + from pysetup.typing import SpecObject + + assert isinstance(spec_obj, SpecObject) + + +def test_run_includes_table_in_specobject(tmp_path, dummy_preset, dummy_config): + # Create a markdown file with a simple markdown table + md_content = """ +# Example + +| Name | Value | Description | +|---------|--------------|------------------| +| CONST_A | uint64(42) | Example constant | +| CONST_B | Bytes32(0x01)| Another constant | +""" + file = tmp_path / "table.md" + file.write_text(md_content) + m2s = MarkdownToSpec( + file_name=Path(file), + preset=dummy_preset, + config=dummy_config, + preset_name="mainnet", + ) + spec_obj = m2s.run() + # The constant should be present in the SpecObject's constant_vars + assert "CONST_A" in spec_obj.constant_vars + assert spec_obj.constant_vars["CONST_A"].type_name == "uint64" + assert spec_obj.constant_vars["CONST_A"].value == "42" + assert "CONST_B" in spec_obj.constant_vars + assert spec_obj.constant_vars["CONST_B"].type_name == "Bytes32" + assert spec_obj.constant_vars["CONST_B"].value == "0x01" + + +def test_run_includes_list_of_records_table(tmp_path, dummy_preset, dummy_config): + md_content = """ + + +| Epoch | Max Blobs Per Block | Description | +| --------------------------- | ------------------- | -------------------------------- | +| `Epoch(269568)` **Deneb** | `uint64(6)` | The limit is set to `6` blobs | +| `Epoch(364032)` **Electra** | `uint64(9)` | The limit is raised to `9` blobs | +""" + file = tmp_path / "list_of_records.md" + file.write_text(md_content) + # The config must have a 'BLOB_SCHEDULE' key with the expected structure for mainnet + config = dummy_config.copy() + config["BLOB_SCHEDULE"] = [ + {"EPOCH": "269568", "MAX_BLOBS_PER_BLOCK": "6"}, + {"EPOCH": "364032", "MAX_BLOBS_PER_BLOCK": "9"}, + ] + m2s = MarkdownToSpec( + file_name=Path(file), + preset=dummy_preset, + config=config, + preset_name="mainnet", + ) + spec_obj = m2s.run() + # The result should have 'BLOB_SCHEDULE' in config_vars + assert "BLOB_SCHEDULE" in spec_obj.config_vars + # The value should be a list of dicts with type constructors applied + assert isinstance(spec_obj.config_vars["BLOB_SCHEDULE"], list) + assert spec_obj.config_vars["BLOB_SCHEDULE"][0]["EPOCH"] == "Epoch(269568)" + assert spec_obj.config_vars["BLOB_SCHEDULE"][0]["MAX_BLOBS_PER_BLOCK"] == "uint64(6)" + assert spec_obj.config_vars["BLOB_SCHEDULE"][1]["EPOCH"] == "Epoch(364032)" + assert spec_obj.config_vars["BLOB_SCHEDULE"][1]["MAX_BLOBS_PER_BLOCK"] == "uint64(9)" + + +def test_run_includes_list_of_records_table_minimal(tmp_path, dummy_preset, dummy_config): + md_content = """ + + +| Epoch | Max Blobs Per Block | Description | +| --------------------------- | ------------------- | -------------------------------- | +| `Epoch(269568)` **Deneb** | `uint64(6)` | The limit is set to `6` blobs | +| `Epoch(364032)` **Electra** | `uint64(9)` | The limit is raised to `9` blobs | +""" + file = tmp_path / "list_of_records_minimal.md" + file.write_text(md_content) + config = dummy_config.copy() + # Use different values than the table for minimal preset + config["BLOB_SCHEDULE"] = [ + {"EPOCH": "2", "MAX_BLOBS_PER_BLOCK": "3"}, + {"EPOCH": "4", "MAX_BLOBS_PER_BLOCK": "5"}, + ] + m2s = MarkdownToSpec( + file_name=Path(file), + preset=dummy_preset, + config=config, + preset_name="minimal", + ) + spec_obj = m2s.run() + assert "BLOB_SCHEDULE" in spec_obj.config_vars + assert isinstance(spec_obj.config_vars["BLOB_SCHEDULE"], list) + # The result should follow the config, not the table + assert spec_obj.config_vars["BLOB_SCHEDULE"][0]["EPOCH"] == "Epoch(2)" + assert spec_obj.config_vars["BLOB_SCHEDULE"][0]["MAX_BLOBS_PER_BLOCK"] == "uint64(3)" + assert spec_obj.config_vars["BLOB_SCHEDULE"][1]["EPOCH"] == "Epoch(4)" + assert spec_obj.config_vars["BLOB_SCHEDULE"][1]["MAX_BLOBS_PER_BLOCK"] == "uint64(5)" + + +def test_run_includes_python_function(tmp_path, dummy_preset, dummy_config): + md_content = """ +#### `compute_epoch_at_slot` + +```python +def compute_epoch_at_slot(slot: Slot) -> Epoch: + \"\"\" + Return the epoch number at slot. + \"\"\" + return Epoch(slot // SLOTS_PER_EPOCH) +``` +""" + file = tmp_path / "function.md" + file.write_text(md_content) + m2s = MarkdownToSpec( + file_name=Path(file), + preset=dummy_preset, + config=dummy_config, + preset_name="mainnet", + ) + spec_obj = m2s.run() + # The function should be present in the SpecObject's functions + assert "compute_epoch_at_slot" in spec_obj.functions + func_src = spec_obj.functions["compute_epoch_at_slot"] + assert "def compute_epoch_at_slot(slot: Slot) -> Epoch" in func_src + assert "return Epoch(slot // SLOTS_PER_EPOCH)" in func_src + + +def test_run_includes_python_class_container(tmp_path, dummy_preset, dummy_config): + md_content = """ +#### `Checkpoint` + +```python +class Checkpoint(Container): + epoch: Epoch + root: Root +``` +""" + file = tmp_path / "class_container.md" + file.write_text(md_content) + m2s = MarkdownToSpec( + file_name=Path(file), + preset=dummy_preset, + config=dummy_config, + preset_name="mainnet", + ) + spec_obj = m2s.run() + # The class should be present in the SpecObject's ssz_objects + assert "Checkpoint" in spec_obj.ssz_objects + class_src = spec_obj.ssz_objects["Checkpoint"] + assert "class Checkpoint(Container):" in class_src + assert "epoch: Epoch" in class_src + assert "root: Root" in class_src + + +def test_run_includes_python_dataclass(tmp_path, dummy_preset, dummy_config): + md_content = """ +## Helpers + +### `PayloadAttributes` + +Used to signal to initiate the payload build process via `notify_forkchoice_updated`. + +```python +@dataclass +class PayloadAttributes(object): + timestamp: uint64 + prev_randao: Bytes32 + suggested_fee_recipient: ExecutionAddress +``` +""" + file = tmp_path / "dataclass.md" + file.write_text(md_content) + m2s = MarkdownToSpec( + file_name=Path(file), + preset=dummy_preset, + config=dummy_config, + preset_name="mainnet", + ) + spec_obj = m2s.run() + # The dataclass should be present in the SpecObject's dataclasses + assert "PayloadAttributes" in spec_obj.dataclasses + class_src = spec_obj.dataclasses["PayloadAttributes"] + assert "@dataclass" in class_src + assert "class PayloadAttributes(object):" in class_src + assert "timestamp: uint64" in class_src + assert "prev_randao: Bytes32" in class_src + assert "suggested_fee_recipient: ExecutionAddress" in class_src + + +def test_run_skips_predefined_type_rows(tmp_path, dummy_preset, dummy_config): + md_content = """ +## Cryptographic types + +| Name | SSZ equivalent | Description | +| ------------------------------------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------- | ------------------------------------------------------------ | +| [`PolynomialCoeff`](https://github.com/ethereum/consensus-specs/blob/36a5719b78523c057065515c8f8fcaeba75d065b/pysetup/spec_builders/eip7594.py#L20-L24) | `List[BLSFieldElement, FIELD_ELEMENTS_PER_EXT_BLOB]` | A polynomial in coefficient form | +| [`Coset`](https://github.com/ethereum/consensus-specs/blob/36a5719b78523c057065515c8f8fcaeba75d065b/pysetup/spec_builders/eip7594.py#L27-L33) | `Vector[BLSFieldElement, FIELD_ELEMENTS_PER_CELL]` | The evaluation domain of a cell | +| [`CosetEvals`](https://github.com/ethereum/consensus-specs/blob/36a5719b78523c057065515c8f8fcaeba75d065b/pysetup/spec_builders/eip7594.py#L36-L42) | `Vector[BLSFieldElement, FIELD_ELEMENTS_PER_CELL]` | A cell's evaluations over its coset | +""" + file = tmp_path / "predefined_types.md" + file.write_text(md_content) + m2s = MarkdownToSpec( + file_name=Path(file), + preset=dummy_preset, + config=dummy_config, + preset_name="mainnet", + ) + spec_obj = m2s.run() + # These should not be in custom_types or constant_vars due to + assert "PolynomialCoeff" not in spec_obj.custom_types + assert "Coset" not in spec_obj.custom_types + assert "CosetEvals" not in spec_obj.custom_types + assert "PolynomialCoeff" not in spec_obj.constant_vars + assert "Coset" not in spec_obj.constant_vars + assert "CosetEvals" not in spec_obj.constant_vars + + +def test_run_skips_eth2spec_skip_code_block(tmp_path, dummy_preset, dummy_config): + md_content = """ +## Helpers + +### `PayloadAttributes` + +Used to signal to initiate the payload build process via `notify_forkchoice_updated`. + + +```python +@dataclass +class PayloadAttributes(object): + timestamp: uint64 + prev_randao: Bytes32 + suggested_fee_recipient: ExecutionAddress +``` +""" + file = tmp_path / "dataclass_skip.md" + file.write_text(md_content) + m2s = MarkdownToSpec( + file_name=Path(file), + preset=dummy_preset, + config=dummy_config, + preset_name="mainnet", + ) + spec_obj = m2s.run() + # The dataclass should NOT be present in the SpecObject's dataclasses + assert "PayloadAttributes" not in spec_obj.dataclasses + + +def test_finalize_types_called_and_updates_custom_types( + tmp_path, dummy_preset, dummy_config, monkeypatch +): + # Minimal markdown with a type definition + md_content = """ +# Types + +| Name | SSZ equivalent | Description | +| ---------------- | -------------- | --------------------------------- | +| `Slot` | `uint64` | a slot number | +| `Epoch` | `uint64` | an epoch number | +""" + file = tmp_path / "types.md" + file.write_text(md_content) + m2s = MarkdownToSpec( + file_name=Path(file), + preset=dummy_preset, + config=dummy_config, + preset_name="mainnet", + ) + + # Spy on _finalize_types + called = {} + orig_finalize_types = m2s._finalize_types + + def spy_finalize_types(): + called["ran"] = True + return orig_finalize_types() + + monkeypatch.setattr(m2s, "_finalize_types", spy_finalize_types) + + spec_obj = m2s.run() + assert called.get("ran") is True + # After _finalize_types, custom_types should include 'Slot' and 'Epoch' + assert spec_obj.custom_types["Slot"] == "uint64" + assert spec_obj.custom_types["Epoch"] == "uint64" From 42c9d46e7a827d46863a0707438827171b80d198 Mon Sep 17 00:00:00 2001 From: Leo Lara Date: Tue, 20 May 2025 11:53:49 +0000 Subject: [PATCH 05/18] Remove whitespaces --- Makefile | 2 +- pysetup/mk_to_spec.py | 6 ++---- tests/infra/test_mk_to_spec.py | 4 +++- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/Makefile b/Makefile index b53fdaf657..70291413b4 100644 --- a/Makefile +++ b/Makefile @@ -248,4 +248,4 @@ kzg_setups: pyspec # Delete all untracked files. clean: - rm -fR venv .mypy_cache build eth2spec.egg-info pysetup/__pycache__ pysetup/spec_builders/__pycache__ + rm -fR venv .mypy_cache build eth2spec.egg-info pysetup/__pycache__ pysetup/spec_builders/__pycache__ diff --git a/pysetup/mk_to_spec.py b/pysetup/mk_to_spec.py index fea0b90a2f..7ce65cb54f 100644 --- a/pysetup/mk_to_spec.py +++ b/pysetup/mk_to_spec.py @@ -75,8 +75,6 @@ def _skip_element(self) -> None: """ self._get_next_element() - - def _parse_document(self, file_name: Path) -> Iterator[Element]: """ Opens the markdown file, parses its content into a document object using _parse_markdown, @@ -123,7 +121,7 @@ def _process_code_block(self, code_block: FencedCode) -> None: module = ast.parse(source) # AST container of the first definition in the block - first_def = module.body[0] + first_def = module.body[0] if isinstance(first_def, ast.FunctionDef): self._process_code_def(clean_source, first_def) @@ -145,7 +143,7 @@ def _process_code_def(self, source: str, fn: ast.FunctionDef) -> None: """ self_type_name = _get_self_type_from_source(fn) - + if self_type_name is None: self.spec["functions"][fn.name] = source else: diff --git a/tests/infra/test_mk_to_spec.py b/tests/infra/test_mk_to_spec.py index 829c8ff155..a75f5dd2ab 100644 --- a/tests/infra/test_mk_to_spec.py +++ b/tests/infra/test_mk_to_spec.py @@ -1,5 +1,7 @@ -import pytest from pathlib import Path + +import pytest + from pysetup.mk_to_spec import MarkdownToSpec From d4116246f6cce7edfbddfe3ad7be0a2c6df02407 Mon Sep 17 00:00:00 2001 From: Leo Lara Date: Tue, 20 May 2025 15:05:38 +0000 Subject: [PATCH 06/18] Fix a few more type hints --- pysetup/mk_to_spec.py | 40 ++++++++++++++++++++-------------------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/pysetup/mk_to_spec.py b/pysetup/mk_to_spec.py index 7ce65cb54f..128828439a 100644 --- a/pysetup/mk_to_spec.py +++ b/pysetup/mk_to_spec.py @@ -2,7 +2,7 @@ import json from pathlib import Path import string -from typing import Dict, Optional, Tuple, Iterator +from typing import Dict, Optional, Tuple, Iterator, cast import re from functools import lru_cache @@ -16,13 +16,13 @@ from .typing import ProtocolDefinition, VariableDefinition, SpecObject class MarkdownToSpec: - def __init__(self, file_name: Path, preset: Dict[str, str], config: Dict[str, str], preset_name: str): + def __init__(self, file_name: Path, preset: Dict[str, str], config: Dict[str, str | Dict[str, str]], preset_name: str): self.preset = preset self.config = config self.preset_name = preset_name # Use a single dict to hold all SpecObject fields - self.spec = { + self.spec: dict[str, dict] = { "functions": {}, "protocols": {}, "custom_types": {}, @@ -159,10 +159,10 @@ def _add_protocol_function(self, protocol_name: str, function_name: str, functio functions={}) self.spec["protocols"][protocol_name].functions[function_name] = function_def - def _add_dataclass(self, source, cls: ast.ClassDef) -> None: + def _add_dataclass(self, source: str, cls: ast.ClassDef) -> None: self.spec["dataclasses"][cls.name] = source - def _process_code_class(self, source, cls: ast.ClassDef) -> None: + def _process_code_class(self, source: str, cls: ast.ClassDef) -> None: """ Processes an AST class definition node, validates its consistency with the current heading, and updates the spec dictionary with the class source code. @@ -186,7 +186,7 @@ def _process_code_class(self, source, cls: ast.ClassDef) -> None: assert parent_class == "Container" self.spec["ssz_objects"][class_name] = source - def _process_table(self, table: Table): + def _process_table(self, table: Table) -> None: """ Handles standard tables (not list-of-records). Iterates over rows, extracting variable names, values, and descriptions. @@ -195,7 +195,7 @@ def _process_table(self, table: Table): Handles special cases for predefined types and function-dependent presets. """ - for row in table.children: + for row in cast(list[TableRow], table.children): if len(row.children) < 2: continue @@ -253,7 +253,7 @@ def _get_table_row_fields(row: TableRow) -> tuple[str, str, Optional[str]]: Extracts the name, value, and description fields from a table row element. Description can be None. """ - cells = row.children + cells = cast(list[TableCell], row.children) name_cell = cells[0] name = name_cell.children[0].children @@ -275,7 +275,7 @@ def _get_table_row_fields(row: TableRow) -> tuple[str, str, Optional[str]]: return name, value, description - def _process_list_of_records_table(self, child, list_of_records_name): + def _process_list_of_records_table(self, table: Table, list_of_records_name: str) -> None: """ Handles tables marked as 'list-of-records'. Extracts headers and rows, mapping field names and types. @@ -292,7 +292,7 @@ def _process_list_of_records_table(self, child, list_of_records_name): The method _process_html_block calls this method when it encounters a comment of the form ``. """ - list_of_records_spec = self._extract_list_of_records_spec(child) + list_of_records_spec = self._extract_list_of_records_spec(table) # Make a type map from the spec definition type_map = self._make_list_of_records_type_map(list_of_records_spec) @@ -327,14 +327,14 @@ def _make_list_of_records_type_map(list_of_records: list[dict[str, str]]) -> dic return type_map @staticmethod - def _extract_list_of_records_spec(child) -> list[dict[str, str]]: + def _extract_list_of_records_spec(table: Table) -> list[dict[str, str]]: """ Extracts the list of records from a table element. Returns a list of dicts, each representing a row with field names as keys. """ # Save the table header, used for field names (skip last item: description) - header_row = child.children[0] + header_row = cast(TableRow, table.children[0]) list_of_records_spec_header = [ re.sub(r'\s+', '_', value.children[0].children.upper()) for value in header_row.children[:-1] @@ -346,7 +346,7 @@ def _extract_list_of_records_spec(child) -> list[dict[str, str]]: list_of_records_spec_header[j]: value.children[0].children for j, value in enumerate(row.children[:-1]) } - for row in child.children[1:] + for row in table.children[1:] ] return list_of_records_spec @@ -370,13 +370,13 @@ def _extract_typed_records_config( list_of_records_config_file.append(new_entry) return list_of_records_config_file - def _process_html_block(self, child): + def _process_html_block(self, html: HTMLBlock) -> None: """ Handles HTML comments for skip logic and list-of-records detection. Sets flags or state variables for the next iteration. """ - body = child.body.strip() + body = html.body.strip() # This comment marks that we should skip the next element if body == "": @@ -394,7 +394,7 @@ def _process_html_block(self, child): f"expected table after list-of-records comment, got {type(table_element)}") self._process_list_of_records_table(table_element, match.group(1).upper()) - def _finalize_types(self): + def _finalize_types(self) -> None: """ Processes all_custom_types into custom_types and preset_dep_custom_types. Calls helper functions to update KZG and CURDLEPROOFS setups if needed. @@ -420,7 +420,7 @@ def _finalize_types(self): else: self.spec["custom_types"][name] = value - def _build_spec_object(self): + def _build_spec_object(self) -> SpecObject: """ Returns the SpecObject using all collected data. """ @@ -505,7 +505,7 @@ def _is_constant_id(name: str) -> bool: return all(map(lambda c: c in string.ascii_uppercase + '_' + string.digits, name[1:])) @lru_cache(maxsize=None) -def _load_kzg_trusted_setups(preset_name): +def _load_kzg_trusted_setups(preset_name: str) -> Tuple[list[str], list[str], list[str]]: trusted_setups_file_path = str(Path(__file__).parent.parent) + '/presets/' + preset_name + '/trusted_setups/trusted_setup_4096.json' with open(trusted_setups_file_path, 'r') as f: @@ -517,7 +517,7 @@ def _load_kzg_trusted_setups(preset_name): return trusted_setup_G1_monomial, trusted_setup_G1_lagrange, trusted_setup_G2_monomial @lru_cache(maxsize=None) -def _load_curdleproofs_crs(preset_name): +def _load_curdleproofs_crs(preset_name: str) -> Dict[str, list[str]]: """ NOTE: File generated from https://github.com/asn-d6/curdleproofs/blob/8e8bf6d4191fb6a844002f75666fb7009716319b/tests/crs.rs#L53-L67 """ @@ -588,7 +588,7 @@ def parse_markdown(content: str) -> Document: return gfm.parse(content) -def check_yaml_matches_spec(var_name, yaml, value_def): +def check_yaml_matches_spec(var_name: str, yaml: Dict[str, str], value_def: VariableDefinition) -> None: """ This function performs a sanity check for presets & configs. To a certain degree, it ensures that the values in the specifications match those in the yaml files. From bdfeaaf866fd5929f4943337a218c3d8743bf8f2 Mon Sep 17 00:00:00 2001 From: Leo Lara Date: Wed, 21 May 2025 16:50:00 +0000 Subject: [PATCH 07/18] Split in two lines --- Makefile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 70291413b4..090283ddd5 100644 --- a/Makefile +++ b/Makefile @@ -112,7 +112,8 @@ test: pyspec $(PRESET) \ $(BLS) \ --junitxml=$(TEST_REPORT_DIR)/test_results.xml \ - $(CURDIR)/tests/infra $(PYSPEC_DIR)/eth2spec + $(CURDIR)/tests/infra \ + $(PYSPEC_DIR)/eth2spec ############################################################################### # Coverage From af4b2b48a087612f7c000bb001c70940faefe3a7 Mon Sep 17 00:00:00 2001 From: Leo Lara Date: Wed, 21 May 2025 16:51:28 +0000 Subject: [PATCH 08/18] mk -> md --- pysetup/{mk_to_spec.py => md_to_spec.py} | 0 tests/infra/{test_mk_to_spec.py => test_md_to_spec.py} | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename pysetup/{mk_to_spec.py => md_to_spec.py} (100%) rename tests/infra/{test_mk_to_spec.py => test_md_to_spec.py} (100%) diff --git a/pysetup/mk_to_spec.py b/pysetup/md_to_spec.py similarity index 100% rename from pysetup/mk_to_spec.py rename to pysetup/md_to_spec.py diff --git a/tests/infra/test_mk_to_spec.py b/tests/infra/test_md_to_spec.py similarity index 100% rename from tests/infra/test_mk_to_spec.py rename to tests/infra/test_md_to_spec.py From 56c55d3206cfa60028671635e31d178f03dd501c Mon Sep 17 00:00:00 2001 From: Leo Lara Date: Wed, 21 May 2025 16:59:12 +0000 Subject: [PATCH 09/18] Fix imports --- setup.py | 2 +- tests/infra/test_md_to_spec.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index 221d20f8bc..1c8e12b5bd 100644 --- a/setup.py +++ b/setup.py @@ -47,7 +47,7 @@ SpecObject, VariableDefinition, ) -from pysetup.mk_to_spec import MarkdownToSpec +from pysetup.md_to_spec import MarkdownToSpec # Ignore '1.5.0-alpha.*' to '1.5.0a*' messages. warnings.filterwarnings('ignore', message='Normalizing .* to .*') diff --git a/tests/infra/test_md_to_spec.py b/tests/infra/test_md_to_spec.py index a75f5dd2ab..b8693309ed 100644 --- a/tests/infra/test_md_to_spec.py +++ b/tests/infra/test_md_to_spec.py @@ -2,7 +2,7 @@ import pytest -from pysetup.mk_to_spec import MarkdownToSpec +from pysetup.md_to_spec import MarkdownToSpec @pytest.fixture From 23958cfc26bafc5689b98e4e4da675421d6d9563 Mon Sep 17 00:00:00 2001 From: Leo Lara Date: Wed, 21 May 2025 17:05:44 +0000 Subject: [PATCH 10/18] Linting --- pysetup/md_to_spec.py | 211 +++++++++++++++++++++++++----------------- 1 file changed, 127 insertions(+), 84 deletions(-) diff --git a/pysetup/md_to_spec.py b/pysetup/md_to_spec.py index 128828439a..d5afe7e263 100644 --- a/pysetup/md_to_spec.py +++ b/pysetup/md_to_spec.py @@ -1,40 +1,46 @@ import ast import json -from pathlib import Path -import string -from typing import Dict, Optional, Tuple, Iterator, cast import re +import string from functools import lru_cache +from pathlib import Path +from typing import cast, Dict, Iterator, Optional, Tuple - -from marko.block import BlankLine, Heading, FencedCode, HTMLBlock, Document +from marko.block import BlankLine, Document, FencedCode, Heading, HTMLBlock from marko.element import Element from marko.ext.gfm import gfm -from marko.ext.gfm.elements import Table, TableRow, TableCell +from marko.ext.gfm.elements import Table, TableCell, TableRow from marko.inline import CodeSpan -from .typing import ProtocolDefinition, VariableDefinition, SpecObject +from .typing import ProtocolDefinition, SpecObject, VariableDefinition + class MarkdownToSpec: - def __init__(self, file_name: Path, preset: Dict[str, str], config: Dict[str, str | Dict[str, str]], preset_name: str): + def __init__( + self, + file_name: Path, + preset: Dict[str, str], + config: Dict[str, str | Dict[str, str]], + preset_name: str, + ): self.preset = preset self.config = config self.preset_name = preset_name # Use a single dict to hold all SpecObject fields self.spec: dict[str, dict] = { - "functions": {}, - "protocols": {}, - "custom_types": {}, - "preset_dep_custom_types": {}, + "config_vars": {}, "constant_vars": {}, + "custom_types": {}, + "dataclasses": {}, + "func_dep_presets": {}, + "functions": {}, "preset_dep_constant_vars": {}, + "preset_dep_custom_types": {}, "preset_vars": {}, - "config_vars": {}, + "protocols": {}, "ssz_dep_constants": {}, - "func_dep_presets": {}, "ssz_objects": {}, - "dataclasses": {}, } self.all_custom_types: Dict[str, str] = {} @@ -61,7 +67,6 @@ def _get_next_element(self) -> Optional[Element]: Returns the next element in the document. If the end of the document is reached, returns None. """ - try: while isinstance(result := next(self.document_iterator), BlankLine): pass @@ -149,14 +154,15 @@ def _process_code_def(self, source: str, fn: ast.FunctionDef) -> None: else: self._add_protocol_function(self_type_name, fn.name, source) - def _add_protocol_function(self, protocol_name: str, function_name: str, function_def: str) -> None: + def _add_protocol_function( + self, protocol_name: str, function_name: str, function_def: str + ) -> None: """ Adds a function definition to the protocol functions dictionary. """ if protocol_name not in self.spec["protocols"]: - self.spec["protocols"][protocol_name] = ProtocolDefinition( - functions={}) + self.spec["protocols"][protocol_name] = ProtocolDefinition(functions={}) self.spec["protocols"][protocol_name].functions[function_name] = function_def def _add_dataclass(self, source: str, cls: ast.ClassDef) -> None: @@ -208,7 +214,9 @@ def _process_table(self, table: Table) -> None: # If it is not a constant, check if it is a custom type if not _is_constant_id(name): # Check for short type declarations - if value.startswith(("uint", "Bytes", "ByteList", "Union", "Vector", "List", "ByteVector")): + if value.startswith( + ("uint", "Bytes", "ByteList", "Union", "Vector", "List", "ByteVector") + ): self.all_custom_types[name] = value continue @@ -228,21 +236,27 @@ def _process_table(self, table: Table) -> None: if self.preset_name == "mainnet": check_yaml_matches_spec(name, self.preset, value_def) - self.spec["preset_vars"][name] = VariableDefinition(value_def.type_name, self.preset[name], value_def.comment, None) + self.spec["preset_vars"][name] = VariableDefinition( + value_def.type_name, self.preset[name], value_def.comment, None + ) # It is a config variable elif name in self.config: if self.preset_name == "mainnet": check_yaml_matches_spec(name, self.config, value_def) - self.spec["config_vars"][name] = VariableDefinition(value_def.type_name, self.config[name], value_def.comment, None) + self.spec["config_vars"][name] = VariableDefinition( + value_def.type_name, self.config[name], value_def.comment, None + ) # It is a constant variable or a preset_dep_constant_vars else: - if name in ('ENDIANNESS', 'KZG_ENDIANNESS'): + if name in ("ENDIANNESS", "KZG_ENDIANNESS"): # Deal with mypy Literal typing check - value_def = _parse_value(name, value, type_hint='Final') - if any(k in value for k in self.preset) or any(k in value for k in self.spec["preset_dep_constant_vars"]): + value_def = _parse_value(name, value, type_hint="Final") + if any(k in value for k in self.preset) or any( + k in value for k in self.spec["preset_dep_constant_vars"] + ): self.spec["preset_dep_constant_vars"][name] = value_def else: self.spec["constant_vars"][name] = value_def @@ -305,8 +319,9 @@ def _process_list_of_records_table(self, table: Table, list_of_records_name: str # For mainnet, check that the spec config & file config are the same # For minimal, we expect this to be different; just use the file config if self.preset_name == "mainnet": - assert list_of_records_spec == list_of_records_config_file, \ - f"list of records mismatch: {list_of_records_spec} vs {list_of_records_config_file}" + assert ( + list_of_records_spec == list_of_records_config_file + ), f"list of records mismatch: {list_of_records_spec} vs {list_of_records_config_file}" # Set the config variable self.spec["config_vars"][list_of_records_name] = list_of_records_config_file @@ -318,7 +333,7 @@ def _make_list_of_records_type_map(list_of_records: list[dict[str, str]]) -> dic from field name to type name, based on values of the form 'TypeName(...)'. """ type_map: dict[str, str] = {} - pattern = re.compile(r'^(\w+)\(.*\)$') + pattern = re.compile(r"^(\w+)\(.*\)$") for entry in list_of_records: for k, v in entry.items(): m = pattern.match(v) @@ -336,7 +351,7 @@ def _extract_list_of_records_spec(table: Table) -> list[dict[str, str]]: # Save the table header, used for field names (skip last item: description) header_row = cast(TableRow, table.children[0]) list_of_records_spec_header = [ - re.sub(r'\s+', '_', value.children[0].children.upper()) + re.sub(r"\s+", "_", value.children[0].children.upper()) for value in header_row.children[:-1] ] @@ -385,13 +400,13 @@ def _process_html_block(self, html: HTMLBlock) -> None: # Handle list-of-records tables # This comment marks that the next table is a list-of-records # e.g. - match = re.match( - r"", body) + match = re.match(r"", body) if match: table_element = self._get_next_element() if not isinstance(table_element, Table): raise Exception( - f"expected table after list-of-records comment, got {type(table_element)}") + f"expected table after list-of-records comment, got {type(table_element)}" + ) self._process_list_of_records_table(table_element, match.group(1).upper()) def _finalize_types(self) -> None: @@ -400,13 +415,13 @@ def _finalize_types(self) -> None: Calls helper functions to update KZG and CURDLEPROOFS setups if needed. """ # Update KZG trusted setup if needed - if any('KZG_SETUP' in name for name in self.spec["constant_vars"]): + if any("KZG_SETUP" in name for name in self.spec["constant_vars"]): _update_constant_vars_with_kzg_setups( self.spec["constant_vars"], self.spec["preset_dep_constant_vars"], self.preset_name ) # Update CURDLEPROOFS CRS if needed - if any('CURDLEPROOFS_CRS' in name for name in self.spec["constant_vars"]): + if any("CURDLEPROOFS_CRS" in name for name in self.spec["constant_vars"]): _update_constant_vars_with_curdleproofs_crs( self.spec["constant_vars"], self.spec["preset_dep_constant_vars"], self.preset_name ) @@ -415,7 +430,9 @@ def _finalize_types(self) -> None: self.spec["custom_types"] = {} self.spec["preset_dep_custom_types"] = {} for name, value in self.all_custom_types.items(): - if any(k in value for k in self.preset) or any(k in value for k in self.spec["preset_dep_constant_vars"]): + if any(k in value for k in self.preset) or any( + k in value for k in self.spec["preset_dep_constant_vars"] + ): self.spec["preset_dep_custom_types"][name] = value else: self.spec["custom_types"][name] = value @@ -425,20 +442,21 @@ def _build_spec_object(self) -> SpecObject: Returns the SpecObject using all collected data. """ return SpecObject( - functions=self.spec["functions"], - protocols=self.spec["protocols"], - custom_types=self.spec["custom_types"], - preset_dep_custom_types=self.spec["preset_dep_custom_types"], + config_vars=self.spec["config_vars"], constant_vars=self.spec["constant_vars"], + custom_types=self.spec["custom_types"], + dataclasses=self.spec["dataclasses"], + func_dep_presets=self.spec["func_dep_presets"], + functions=self.spec["functions"], preset_dep_constant_vars=self.spec["preset_dep_constant_vars"], + preset_dep_custom_types=self.spec["preset_dep_custom_types"], preset_vars=self.spec["preset_vars"], - config_vars=self.spec["config_vars"], + protocols=self.spec["protocols"], ssz_dep_constants=self.spec["ssz_dep_constants"], - func_dep_presets=self.spec["func_dep_presets"], ssz_objects=self.spec["ssz_objects"], - dataclasses=self.spec["dataclasses"], ) + @lru_cache(maxsize=None) def _get_name_from_heading(heading: Heading) -> Optional[str]: last_child = heading.children[-1] @@ -465,7 +483,7 @@ def _get_self_type_from_source(fn: ast.FunctionDef) -> Optional[str]: args = fn.args.args if len(args) == 0: return None - if args[0].arg != 'self': + if args[0].arg != "self": return None if args[0].annotation is None: return None @@ -500,86 +518,104 @@ def _is_constant_id(name: str) -> bool: bool: True if the name is a valid constant identifier, False otherwise. """ - if name[0] not in string.ascii_uppercase + '_': + if name[0] not in string.ascii_uppercase + "_": return False - return all(map(lambda c: c in string.ascii_uppercase + '_' + string.digits, name[1:])) + return all(map(lambda c: c in string.ascii_uppercase + "_" + string.digits, name[1:])) + @lru_cache(maxsize=None) def _load_kzg_trusted_setups(preset_name: str) -> Tuple[list[str], list[str], list[str]]: - trusted_setups_file_path = str(Path(__file__).parent.parent) + '/presets/' + preset_name + '/trusted_setups/trusted_setup_4096.json' + trusted_setups_file_path = ( + str(Path(__file__).parent.parent) + + "/presets/" + + preset_name + + "/trusted_setups/trusted_setup_4096.json" + ) - with open(trusted_setups_file_path, 'r') as f: + with open(trusted_setups_file_path, "r") as f: json_data = json.load(f) - trusted_setup_G1_monomial = json_data['g1_monomial'] - trusted_setup_G1_lagrange = json_data['g1_lagrange'] - trusted_setup_G2_monomial = json_data['g2_monomial'] + trusted_setup_G1_monomial = json_data["g1_monomial"] + trusted_setup_G1_lagrange = json_data["g1_lagrange"] + trusted_setup_G2_monomial = json_data["g2_monomial"] return trusted_setup_G1_monomial, trusted_setup_G1_lagrange, trusted_setup_G2_monomial + @lru_cache(maxsize=None) def _load_curdleproofs_crs(preset_name: str) -> Dict[str, list[str]]: """ NOTE: File generated from https://github.com/asn-d6/curdleproofs/blob/8e8bf6d4191fb6a844002f75666fb7009716319b/tests/crs.rs#L53-L67 """ - file_path = str(Path(__file__).parent.parent) + '/presets/' + preset_name + '/trusted_setups/curdleproofs_crs.json' + file_path = ( + str(Path(__file__).parent.parent) + + "/presets/" + + preset_name + + "/trusted_setups/curdleproofs_crs.json" + ) - with open(file_path, 'r') as f: + with open(file_path, "r") as f: json_data = json.load(f) return json_data ALL_KZG_SETUPS = { - 'minimal': _load_kzg_trusted_setups('minimal'), - 'mainnet': _load_kzg_trusted_setups('mainnet') + "minimal": _load_kzg_trusted_setups("minimal"), + "mainnet": _load_kzg_trusted_setups("mainnet"), } ALL_CURDLEPROOFS_CRS = { - 'minimal': _load_curdleproofs_crs('minimal'), - 'mainnet': _load_curdleproofs_crs('mainnet'), + "minimal": _load_curdleproofs_crs("minimal"), + "mainnet": _load_curdleproofs_crs("mainnet"), } + @lru_cache(maxsize=None) -def _parse_value(name: str, typed_value: str, type_hint: Optional[str] = None) -> VariableDefinition: +def _parse_value( + name: str, typed_value: str, type_hint: Optional[str] = None +) -> VariableDefinition: comment = None if name in ("ROOT_OF_UNITY_EXTENDED", "ROOTS_OF_UNITY_EXTENDED", "ROOTS_OF_UNITY_REDUCED"): comment = "noqa: E501" typed_value = typed_value.strip() - if '(' not in typed_value: - return VariableDefinition(type_name=None, value=typed_value, comment=comment, type_hint=type_hint) - i = typed_value.index('(') + if "(" not in typed_value: + return VariableDefinition( + type_name=None, value=typed_value, comment=comment, type_hint=type_hint + ) + i = typed_value.index("(") type_name = typed_value[:i] - return VariableDefinition(type_name=type_name, value=typed_value[i+1:-1], comment=comment, type_hint=type_hint) + return VariableDefinition( + type_name=type_name, value=typed_value[i + 1 : -1], comment=comment, type_hint=type_hint + ) def _update_constant_vars_with_kzg_setups(constant_vars, preset_dep_constant_vars, preset_name): comment = "noqa: E501" kzg_setups = ALL_KZG_SETUPS[preset_name] - preset_dep_constant_vars['KZG_SETUP_G1_MONOMIAL'] = VariableDefinition( - preset_dep_constant_vars['KZG_SETUP_G1_MONOMIAL'].value, - str(kzg_setups[0]), - comment, None + preset_dep_constant_vars["KZG_SETUP_G1_MONOMIAL"] = VariableDefinition( + preset_dep_constant_vars["KZG_SETUP_G1_MONOMIAL"].value, str(kzg_setups[0]), comment, None ) - preset_dep_constant_vars['KZG_SETUP_G1_LAGRANGE'] = VariableDefinition( - preset_dep_constant_vars['KZG_SETUP_G1_LAGRANGE'].value, - str(kzg_setups[1]), - comment, None + preset_dep_constant_vars["KZG_SETUP_G1_LAGRANGE"] = VariableDefinition( + preset_dep_constant_vars["KZG_SETUP_G1_LAGRANGE"].value, str(kzg_setups[1]), comment, None ) - constant_vars['KZG_SETUP_G2_MONOMIAL'] = VariableDefinition( - constant_vars['KZG_SETUP_G2_MONOMIAL'].value, - str(kzg_setups[2]), - comment, None + constant_vars["KZG_SETUP_G2_MONOMIAL"] = VariableDefinition( + constant_vars["KZG_SETUP_G2_MONOMIAL"].value, str(kzg_setups[2]), comment, None ) -def _update_constant_vars_with_curdleproofs_crs(constant_vars, preset_dep_constant_vars, preset_name): +def _update_constant_vars_with_curdleproofs_crs( + constant_vars, preset_dep_constant_vars, preset_name +): comment = "noqa: E501" - constant_vars['CURDLEPROOFS_CRS'] = VariableDefinition( + constant_vars["CURDLEPROOFS_CRS"] = VariableDefinition( + None, + "curdleproofs.CurdleproofsCrs.from_json(json.dumps(" + + str(ALL_CURDLEPROOFS_CRS[str(preset_name)]).replace("0x", "") + + "))", + comment, None, - 'curdleproofs.CurdleproofsCrs.from_json(json.dumps(' + str(ALL_CURDLEPROOFS_CRS[str(preset_name)]).replace('0x', '') + '))', - comment, None ) @@ -588,7 +624,9 @@ def parse_markdown(content: str) -> Document: return gfm.parse(content) -def check_yaml_matches_spec(var_name: str, yaml: Dict[str, str], value_def: VariableDefinition) -> None: +def check_yaml_matches_spec( + var_name: str, yaml: Dict[str, str], value_def: VariableDefinition +) -> None: """ This function performs a sanity check for presets & configs. To a certain degree, it ensures that the values in the specifications match those in the yaml files. @@ -604,17 +642,22 @@ def check_yaml_matches_spec(var_name: str, yaml: Dict[str, str], value_def: Vari if var in updated_value: updated_value = updated_value.replace(var, yaml[var]) try: - assert yaml[var_name] == repr(eval(updated_value)), \ - f"mismatch for {var_name}: {yaml[var_name]} vs {eval(updated_value)}" + assert yaml[var_name] == repr( + eval(updated_value) + ), f"mismatch for {var_name}: {yaml[var_name]} vs {eval(updated_value)}" except NameError: # Okay it's probably something more serious, let's ignore pass + def _has_decorator(decorateable: ast.ClassDef | ast.FunctionDef, name: str) -> bool: return any(_is_decorator(d, name) for d in decorateable.decorator_list) + def _is_decorator(decorator: ast.expr, name: str) -> bool: - return (isinstance(decorator, ast.Name) and decorator.id == name) or \ - (isinstance(decorator, ast.Attribute) and decorator.attr == name) or \ - (isinstance(decorator, ast.Call) and decorator.func.id == name) or \ - (isinstance(decorator, ast.Subscript) and decorator.value.id == name) + return ( + (isinstance(decorator, ast.Name) and decorator.id == name) + or (isinstance(decorator, ast.Attribute) and decorator.attr == name) + or (isinstance(decorator, ast.Call) and decorator.func.id == name) + or (isinstance(decorator, ast.Subscript) and decorator.value.id == name) + ) From a003052939226ea870c2f31010e39301c8802f7f Mon Sep 17 00:00:00 2001 From: Leo Lara Date: Thu, 22 May 2025 05:11:50 +0000 Subject: [PATCH 11/18] Remove blank line --- setup.py | 1 - 1 file changed, 1 deletion(-) diff --git a/setup.py b/setup.py index 1c8e12b5bd..5db4ab1858 100644 --- a/setup.py +++ b/setup.py @@ -682,4 +682,3 @@ def run(self): py_modules=["eth2spec"], cmdclass=commands, ) - From 7d660336721200063a54f057a14649ed7adda846 Mon Sep 17 00:00:00 2001 From: Leo Lara Date: Thu, 22 May 2025 07:17:01 +0000 Subject: [PATCH 12/18] Fix docstrings and some typing --- pysetup/md_to_spec.py | 116 ++++++++++++++++-------------------------- 1 file changed, 43 insertions(+), 73 deletions(-) diff --git a/pysetup/md_to_spec.py b/pysetup/md_to_spec.py index d5afe7e263..69c2fecfc4 100644 --- a/pysetup/md_to_spec.py +++ b/pysetup/md_to_spec.py @@ -4,7 +4,7 @@ import string from functools import lru_cache from pathlib import Path -from typing import cast, Dict, Iterator, Optional, Tuple +from typing import cast, Dict, Iterator, Mapping, Optional, Tuple from marko.block import BlankLine, Document, FencedCode, Heading, HTMLBlock from marko.element import Element @@ -19,10 +19,13 @@ class MarkdownToSpec: def __init__( self, file_name: Path, - preset: Dict[str, str], - config: Dict[str, str | Dict[str, str]], + preset: dict[str, str], + config: dict[str, str | list[dict[str, str]]], preset_name: str, ): + """ + Initializes the MarkdownToSpec instance. + """ self.preset = preset self.config = config self.preset_name = preset_name @@ -50,12 +53,7 @@ def __init__( def run(self) -> SpecObject: """ - Orchestrates the parsing and processing of the markdown spec file. - - Calls _parse_document() - - Iterates over self.document.children and processes each child - - Calls _finalize_types() and _build_spec_object() after processing - Returns: - SpecObject: The constructed specification object. + Parses the markdown spec file and returns the SpecObject. """ while (child := self._get_next_element()) is not None: self._process_child(child) @@ -64,8 +62,7 @@ def run(self) -> SpecObject: def _get_next_element(self) -> Optional[Element]: """ - Returns the next element in the document. - If the end of the document is reached, returns None. + Returns the next non-blank element in the document. """ try: while isinstance(result := next(self.document_iterator), BlankLine): @@ -82,14 +79,15 @@ def _skip_element(self) -> None: def _parse_document(self, file_name: Path) -> Iterator[Element]: """ - Opens the markdown file, parses its content into a document object using _parse_markdown, - and stores the parsed document in self.document. + Parses the markdown file into document elements. """ with open(file_name) as source_file: document = parse_markdown(source_file.read()) return iter(document.children) def _process_child(self, child: Element) -> None: + """Processes a child Markdown element by dispatching to the appropriate handler based on its type.""" + # Skip blank lines if isinstance(child, BlankLine): return @@ -113,10 +111,8 @@ def _process_heading(self, heading: Heading) -> None: def _process_code_block(self, code_block: FencedCode) -> None: """ - Processes a FencedCode block: - - Checks if the code block is Python. + Processes a FencedCode block, ignoring non-Python code. - Extracts source code and determines if it is a function, dataclass, or class. - - Updates the appropriate dictionary (functions, protocols, dataclasses, ssz_objects). """ if code_block.lang != "python": return @@ -139,14 +135,8 @@ def _process_code_block(self, code_block: FencedCode) -> None: def _process_code_def(self, source: str, fn: ast.FunctionDef) -> None: """ - Processes a function definition node from the AST and stores its source code representation. - If the function is a method (i.e., has a self type), it is added to the protocol functions for that type. - Otherwise, it is stored as a standalone function. - Args: - source (str): The source code of the function definition. - fn (ast.FunctionDef): The AST node representing the function definition. + Processes a function definition and stores it in the spec. """ - self_type_name = _get_self_type_from_source(fn) if self_type_name is None: @@ -160,7 +150,6 @@ def _add_protocol_function( """ Adds a function definition to the protocol functions dictionary. """ - if protocol_name not in self.spec["protocols"]: self.spec["protocols"][protocol_name] = ProtocolDefinition(functions={}) self.spec["protocols"][protocol_name].functions[function_name] = function_def @@ -170,18 +159,8 @@ def _add_dataclass(self, source: str, cls: ast.ClassDef) -> None: def _process_code_class(self, source: str, cls: ast.ClassDef) -> None: """ - Processes an AST class definition node, validates its consistency with the current heading, - and updates the spec dictionary with the class source code. - Args: - source (str): The source code of the class. - cls (ast.ClassDef): The AST node representing the class definition. - Raises: - Exception: If the class name does not match the current heading name. - AssertionError: If the parent class is not 'Container' when a parent class is present. - Side Effects: - Updates self.spec["ssz_objects"] with the class source code, keyed by class name. + Processes a class definition and updates the spec. """ - class_name, parent_class = _get_class_info_from_ast(cls) # check consistency with spec @@ -194,13 +173,8 @@ def _process_code_class(self, source: str, cls: ast.ClassDef) -> None: def _process_table(self, table: Table) -> None: """ - Handles standard tables (not list-of-records). - Iterates over rows, extracting variable names, values, and descriptions. - Determines if the variable is a constant, preset, config, or custom type. - Updates the corresponding dictionaries. - Handles special cases for predefined types and function-dependent presets. + Processes a table and updates the spec with its data. """ - for row in cast(list[TableRow], table.children): if len(row.children) < 2: continue @@ -245,9 +219,13 @@ def _process_table(self, table: Table) -> None: if self.preset_name == "mainnet": check_yaml_matches_spec(name, self.config, value_def) - self.spec["config_vars"][name] = VariableDefinition( - value_def.type_name, self.config[name], value_def.comment, None - ) + config_value = self.config[name] + if isinstance(config_value, str): + self.spec["config_vars"][name] = VariableDefinition( + value_def.type_name, config_value, value_def.comment, None + ) + else: + raise ValueError(f"Variable {name} should be a string in the config file.") # It is a constant variable or a preset_dep_constant_vars else: @@ -265,7 +243,6 @@ def _process_table(self, table: Table) -> None: def _get_table_row_fields(row: TableRow) -> tuple[str, str, Optional[str]]: """ Extracts the name, value, and description fields from a table row element. - Description can be None. """ cells = cast(list[TableCell], row.children) name_cell = cells[0] @@ -292,9 +269,6 @@ def _get_table_row_fields(row: TableRow) -> tuple[str, str, Optional[str]]: def _process_list_of_records_table(self, table: Table, list_of_records_name: str) -> None: """ Handles tables marked as 'list-of-records'. - Extracts headers and rows, mapping field names and types. - Applies type mapping to config entries. - Validates or updates the config variable as needed based on preset_name. Updates config_vars with the processed list. Example of input: @@ -374,7 +348,11 @@ def _extract_typed_records_config( Returns a new list of dicts with types applied. """ list_of_records_config_file: list[dict[str, str]] = [] - for entry in self.config[list_of_records_name]: + entries = self.config[list_of_records_name] + if not isinstance(entries, list): + raise ValueError(f"Expected a dict for {list_of_records_name} in config file") + + for entry in entries: new_entry = {} for k, v in entry.items(): ctor = type_map.get(k) @@ -390,7 +368,6 @@ def _process_html_block(self, html: HTMLBlock) -> None: Handles HTML comments for skip logic and list-of-records detection. Sets flags or state variables for the next iteration. """ - body = html.body.strip() # This comment marks that we should skip the next element @@ -423,7 +400,7 @@ def _finalize_types(self) -> None: # Update CURDLEPROOFS CRS if needed if any("CURDLEPROOFS_CRS" in name for name in self.spec["constant_vars"]): _update_constant_vars_with_curdleproofs_crs( - self.spec["constant_vars"], self.spec["preset_dep_constant_vars"], self.preset_name + self.spec["constant_vars"], self.preset_name ) # Split all_custom_types into custom_types and preset_dep_custom_types @@ -470,14 +447,6 @@ def _get_source_from_code_block(block: FencedCode) -> str: return block.children[0].children.strip() -@lru_cache(maxsize=None) -def _get_function_name_from_source(source: str) -> str: - fn = ast.parse(source).body[0] - if not isinstance(fn, ast.FunctionDef): - raise Exception("expected function definition") - return fn.name - - @lru_cache(maxsize=None) def _get_self_type_from_source(fn: ast.FunctionDef) -> Optional[str]: args = fn.args.args @@ -508,16 +477,8 @@ def _get_class_info_from_ast(cls: ast.ClassDef) -> Tuple[str, Optional[str]]: @lru_cache(maxsize=None) def _is_constant_id(name: str) -> bool: """ - Check if the given name follows the convention for constant identifiers. - A valid constant identifier must: - - Start with an uppercase ASCII letter or an underscore ('_'). - - All subsequent characters (if any) must be uppercase ASCII letters, underscores, or digits. - Args: - name (str): The identifier name to check. - Returns: - bool: True if the name is a valid constant identifier, False otherwise. + Checks if the given name follows the convention for constant identifiers. """ - if name[0] not in string.ascii_uppercase + "_": return False return all(map(lambda c: c in string.ascii_uppercase + "_" + string.digits, name[1:])) @@ -591,7 +552,11 @@ def _parse_value( ) -def _update_constant_vars_with_kzg_setups(constant_vars, preset_dep_constant_vars, preset_name): +def _update_constant_vars_with_kzg_setups( + constant_vars: dict[str, VariableDefinition], + preset_dep_constant_vars: dict[str, VariableDefinition], + preset_name: str, +) -> None: comment = "noqa: E501" kzg_setups = ALL_KZG_SETUPS[preset_name] preset_dep_constant_vars["KZG_SETUP_G1_MONOMIAL"] = VariableDefinition( @@ -606,8 +571,8 @@ def _update_constant_vars_with_kzg_setups(constant_vars, preset_dep_constant_var def _update_constant_vars_with_curdleproofs_crs( - constant_vars, preset_dep_constant_vars, preset_name -): + constant_vars: dict[str, VariableDefinition], preset_name: str +) -> None: comment = "noqa: E501" constant_vars["CURDLEPROOFS_CRS"] = VariableDefinition( None, @@ -625,7 +590,7 @@ def parse_markdown(content: str) -> Document: def check_yaml_matches_spec( - var_name: str, yaml: Dict[str, str], value_def: VariableDefinition + var_name: str, yaml: Mapping[str, str | list[dict[str, str]]], value_def: VariableDefinition ) -> None: """ This function performs a sanity check for presets & configs. To a certain degree, it ensures @@ -640,7 +605,12 @@ def check_yaml_matches_spec( updated_value = value_def.value for var in sorted(yaml.keys(), reverse=True): if var in updated_value: - updated_value = updated_value.replace(var, yaml[var]) + value = yaml[var] + if isinstance(value, str): + updated_value = updated_value.replace(var, value) + + else: + raise ValueError(f"Variable {var} should be a string in the yaml file.") try: assert yaml[var_name] == repr( eval(updated_value) From 9ea94a7148f328063828999114da64db392eb36c Mon Sep 17 00:00:00 2001 From: Leo Lara Date: Fri, 23 May 2025 11:25:34 +0000 Subject: [PATCH 13/18] Restore makefile --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 13098cd91b..16ce32e45a 100644 --- a/Makefile +++ b/Makefile @@ -249,4 +249,4 @@ kzg_setups: pyspec # Delete all untracked files. clean: - rm -fR venv .mypy_cache build eth2spec.egg-info pysetup/__pycache__ pysetup/spec_builders/__pycache__ + @git clean -fdX From 035a3164fc4a83188ceb6907a0067cd521a446db Mon Sep 17 00:00:00 2001 From: Leo Lara Date: Fri, 23 May 2025 11:28:29 +0000 Subject: [PATCH 14/18] Add fruits --- pysetup/md_to_spec.py | 48 ++++++++++++++++++++++++++++--------------- 1 file changed, 31 insertions(+), 17 deletions(-) diff --git a/pysetup/md_to_spec.py b/pysetup/md_to_spec.py index 69c2fecfc4..dc82998bdf 100644 --- a/pysetup/md_to_spec.py +++ b/pysetup/md_to_spec.py @@ -48,7 +48,8 @@ def __init__( self.all_custom_types: Dict[str, str] = {} - self.document_iterator: Iterator[Element] = self._parse_document(file_name) + self.document_iterator: Iterator[Element] = self._parse_document( + file_name) self.current_heading_name: str | None = None def run(self) -> SpecObject: @@ -151,7 +152,8 @@ def _add_protocol_function( Adds a function definition to the protocol functions dictionary. """ if protocol_name not in self.spec["protocols"]: - self.spec["protocols"][protocol_name] = ProtocolDefinition(functions={}) + self.spec["protocols"][protocol_name] = ProtocolDefinition( + functions={}) self.spec["protocols"][protocol_name].functions[function_name] = function_def def _add_dataclass(self, source: str, cls: ast.ClassDef) -> None: @@ -165,7 +167,8 @@ def _process_code_class(self, source: str, cls: ast.ClassDef) -> None: # check consistency with spec if class_name != self.current_heading_name: - raise Exception(f"class_name {class_name} != current_name {self.current_heading_name}") + raise Exception( + f"class_name {class_name} != current_name {self.current_heading_name}") if parent_class: assert parent_class == "Container" @@ -189,7 +192,8 @@ def _process_table(self, table: Table) -> None: if not _is_constant_id(name): # Check for short type declarations if value.startswith( - ("uint", "Bytes", "ByteList", "Union", "Vector", "List", "ByteVector") + ("uint", "Bytes", "ByteList", "Union", + "Vector", "List", "ByteVector") ): self.all_custom_types[name] = value continue @@ -225,7 +229,8 @@ def _process_table(self, table: Table) -> None: value_def.type_name, config_value, value_def.comment, None ) else: - raise ValueError(f"Variable {name} should be a string in the config file.") + raise ValueError( + f"Variable {name} should be a string in the config file.") # It is a constant variable or a preset_dep_constant_vars else: @@ -272,10 +277,11 @@ def _process_list_of_records_table(self, table: Table, list_of_records_name: str Updates config_vars with the processed list. Example of input: - | Epoch | Max Blobs Per Block | Description | - | --------------------------- | ------------------- | -------------------------------- | - | `Epoch(269568)` **Deneb** | `uint64(6)` | The limit is set to `6` blobs | - | `Epoch(364032)` **Electra** | `uint64(9)` | The limit is raised to `9` blobs | + | Name | Calories | Description | + | ------ | ------------- | ------------- | + | Apple | `uint64(96)` | 5.3oz serving | + | Orange | `uint64(75)` | 5.6oz serving | + | Banana | `uint64(111)` | 4.4oz serving | The method _process_html_block calls this method when it encounters a comment of the form ``. @@ -350,7 +356,8 @@ def _extract_typed_records_config( list_of_records_config_file: list[dict[str, str]] = [] entries = self.config[list_of_records_name] if not isinstance(entries, list): - raise ValueError(f"Expected a dict for {list_of_records_name} in config file") + raise ValueError( + f"Expected a dict for {list_of_records_name} in config file") for entry in entries: new_entry = {} @@ -377,14 +384,16 @@ def _process_html_block(self, html: HTMLBlock) -> None: # Handle list-of-records tables # This comment marks that the next table is a list-of-records # e.g. - match = re.match(r"", body) + match = re.match( + r"", body) if match: table_element = self._get_next_element() if not isinstance(table_element, Table): raise Exception( f"expected table after list-of-records comment, got {type(table_element)}" ) - self._process_list_of_records_table(table_element, match.group(1).upper()) + self._process_list_of_records_table( + table_element, match.group(1).upper()) def _finalize_types(self) -> None: """ @@ -548,7 +557,8 @@ def _parse_value( type_name = typed_value[:i] return VariableDefinition( - type_name=type_name, value=typed_value[i + 1 : -1], comment=comment, type_hint=type_hint + type_name=type_name, value=typed_value[i + + 1: -1], comment=comment, type_hint=type_hint ) @@ -560,13 +570,16 @@ def _update_constant_vars_with_kzg_setups( comment = "noqa: E501" kzg_setups = ALL_KZG_SETUPS[preset_name] preset_dep_constant_vars["KZG_SETUP_G1_MONOMIAL"] = VariableDefinition( - preset_dep_constant_vars["KZG_SETUP_G1_MONOMIAL"].value, str(kzg_setups[0]), comment, None + preset_dep_constant_vars["KZG_SETUP_G1_MONOMIAL"].value, str( + kzg_setups[0]), comment, None ) preset_dep_constant_vars["KZG_SETUP_G1_LAGRANGE"] = VariableDefinition( - preset_dep_constant_vars["KZG_SETUP_G1_LAGRANGE"].value, str(kzg_setups[1]), comment, None + preset_dep_constant_vars["KZG_SETUP_G1_LAGRANGE"].value, str( + kzg_setups[1]), comment, None ) constant_vars["KZG_SETUP_G2_MONOMIAL"] = VariableDefinition( - constant_vars["KZG_SETUP_G2_MONOMIAL"].value, str(kzg_setups[2]), comment, None + constant_vars["KZG_SETUP_G2_MONOMIAL"].value, str( + kzg_setups[2]), comment, None ) @@ -610,7 +623,8 @@ def check_yaml_matches_spec( updated_value = updated_value.replace(var, value) else: - raise ValueError(f"Variable {var} should be a string in the yaml file.") + raise ValueError( + f"Variable {var} should be a string in the yaml file.") try: assert yaml[var_name] == repr( eval(updated_value) From 3baab739e447c7d1ceba09f032b3510a1f2c1aa2 Mon Sep 17 00:00:00 2001 From: Leo Lara Date: Sat, 24 May 2025 05:55:08 +0000 Subject: [PATCH 15/18] Fix parameter for git clean --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 16ce32e45a..f982fcf15d 100644 --- a/Makefile +++ b/Makefile @@ -249,4 +249,4 @@ kzg_setups: pyspec # Delete all untracked files. clean: - @git clean -fdX + @git clean -fdx From 9f4ab4a14af0cec976a2e6d491de7307210093e4 Mon Sep 17 00:00:00 2001 From: Leo Lara Date: Sat, 24 May 2025 05:58:11 +0000 Subject: [PATCH 16/18] Linter --- pysetup/md_to_spec.py | 39 +++++++++++++-------------------------- 1 file changed, 13 insertions(+), 26 deletions(-) diff --git a/pysetup/md_to_spec.py b/pysetup/md_to_spec.py index dc82998bdf..706bf3f27f 100644 --- a/pysetup/md_to_spec.py +++ b/pysetup/md_to_spec.py @@ -48,8 +48,7 @@ def __init__( self.all_custom_types: Dict[str, str] = {} - self.document_iterator: Iterator[Element] = self._parse_document( - file_name) + self.document_iterator: Iterator[Element] = self._parse_document(file_name) self.current_heading_name: str | None = None def run(self) -> SpecObject: @@ -152,8 +151,7 @@ def _add_protocol_function( Adds a function definition to the protocol functions dictionary. """ if protocol_name not in self.spec["protocols"]: - self.spec["protocols"][protocol_name] = ProtocolDefinition( - functions={}) + self.spec["protocols"][protocol_name] = ProtocolDefinition(functions={}) self.spec["protocols"][protocol_name].functions[function_name] = function_def def _add_dataclass(self, source: str, cls: ast.ClassDef) -> None: @@ -167,8 +165,7 @@ def _process_code_class(self, source: str, cls: ast.ClassDef) -> None: # check consistency with spec if class_name != self.current_heading_name: - raise Exception( - f"class_name {class_name} != current_name {self.current_heading_name}") + raise Exception(f"class_name {class_name} != current_name {self.current_heading_name}") if parent_class: assert parent_class == "Container" @@ -192,8 +189,7 @@ def _process_table(self, table: Table) -> None: if not _is_constant_id(name): # Check for short type declarations if value.startswith( - ("uint", "Bytes", "ByteList", "Union", - "Vector", "List", "ByteVector") + ("uint", "Bytes", "ByteList", "Union", "Vector", "List", "ByteVector") ): self.all_custom_types[name] = value continue @@ -229,8 +225,7 @@ def _process_table(self, table: Table) -> None: value_def.type_name, config_value, value_def.comment, None ) else: - raise ValueError( - f"Variable {name} should be a string in the config file.") + raise ValueError(f"Variable {name} should be a string in the config file.") # It is a constant variable or a preset_dep_constant_vars else: @@ -356,8 +351,7 @@ def _extract_typed_records_config( list_of_records_config_file: list[dict[str, str]] = [] entries = self.config[list_of_records_name] if not isinstance(entries, list): - raise ValueError( - f"Expected a dict for {list_of_records_name} in config file") + raise ValueError(f"Expected a dict for {list_of_records_name} in config file") for entry in entries: new_entry = {} @@ -384,16 +378,14 @@ def _process_html_block(self, html: HTMLBlock) -> None: # Handle list-of-records tables # This comment marks that the next table is a list-of-records # e.g. - match = re.match( - r"", body) + match = re.match(r"", body) if match: table_element = self._get_next_element() if not isinstance(table_element, Table): raise Exception( f"expected table after list-of-records comment, got {type(table_element)}" ) - self._process_list_of_records_table( - table_element, match.group(1).upper()) + self._process_list_of_records_table(table_element, match.group(1).upper()) def _finalize_types(self) -> None: """ @@ -557,8 +549,7 @@ def _parse_value( type_name = typed_value[:i] return VariableDefinition( - type_name=type_name, value=typed_value[i + - 1: -1], comment=comment, type_hint=type_hint + type_name=type_name, value=typed_value[i + 1 : -1], comment=comment, type_hint=type_hint ) @@ -570,16 +561,13 @@ def _update_constant_vars_with_kzg_setups( comment = "noqa: E501" kzg_setups = ALL_KZG_SETUPS[preset_name] preset_dep_constant_vars["KZG_SETUP_G1_MONOMIAL"] = VariableDefinition( - preset_dep_constant_vars["KZG_SETUP_G1_MONOMIAL"].value, str( - kzg_setups[0]), comment, None + preset_dep_constant_vars["KZG_SETUP_G1_MONOMIAL"].value, str(kzg_setups[0]), comment, None ) preset_dep_constant_vars["KZG_SETUP_G1_LAGRANGE"] = VariableDefinition( - preset_dep_constant_vars["KZG_SETUP_G1_LAGRANGE"].value, str( - kzg_setups[1]), comment, None + preset_dep_constant_vars["KZG_SETUP_G1_LAGRANGE"].value, str(kzg_setups[1]), comment, None ) constant_vars["KZG_SETUP_G2_MONOMIAL"] = VariableDefinition( - constant_vars["KZG_SETUP_G2_MONOMIAL"].value, str( - kzg_setups[2]), comment, None + constant_vars["KZG_SETUP_G2_MONOMIAL"].value, str(kzg_setups[2]), comment, None ) @@ -623,8 +611,7 @@ def check_yaml_matches_spec( updated_value = updated_value.replace(var, value) else: - raise ValueError( - f"Variable {var} should be a string in the yaml file.") + raise ValueError(f"Variable {var} should be a string in the yaml file.") try: assert yaml[var_name] == repr( eval(updated_value) From 6595bdbf5b11f15dc486a0bdccf8a6009907aba3 Mon Sep 17 00:00:00 2001 From: Leo Lara Date: Tue, 27 May 2025 06:18:53 +0000 Subject: [PATCH 17/18] Reorder __init__.py --- pysetup/md_to_spec.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/pysetup/md_to_spec.py b/pysetup/md_to_spec.py index 706bf3f27f..be0b3fc323 100644 --- a/pysetup/md_to_spec.py +++ b/pysetup/md_to_spec.py @@ -30,6 +30,10 @@ def __init__( self.config = config self.preset_name = preset_name + self.document_iterator: Iterator[Element] = self._parse_document(file_name) + self.all_custom_types: Dict[str, str] = {} + self.current_heading_name: str | None = None + # Use a single dict to hold all SpecObject fields self.spec: dict[str, dict] = { "config_vars": {}, @@ -46,11 +50,6 @@ def __init__( "ssz_objects": {}, } - self.all_custom_types: Dict[str, str] = {} - - self.document_iterator: Iterator[Element] = self._parse_document(file_name) - self.current_heading_name: str | None = None - def run(self) -> SpecObject: """ Parses the markdown spec file and returns the SpecObject. From c92d629e3b00d39690829db902b8a0ee9f22e3bb Mon Sep 17 00:00:00 2001 From: Leo Lara Date: Wed, 28 May 2025 10:54:29 +0000 Subject: [PATCH 18/18] Remove old implementation --- pyproject.toml | 1 - setup.py | 402 +------------------------------------------------ 2 files changed, 6 insertions(+), 397 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index c51111f779..83d5a46ddb 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,6 @@ requires = [ "ruamel.yaml==0.18.10", "setuptools==80.7.1", "wheel==0.45.1", - "deepdiff==8.5.0", ] [project] diff --git a/setup.py b/setup.py index 5db4ab1858..2f52ccddee 100644 --- a/setup.py +++ b/setup.py @@ -1,9 +1,6 @@ -import ast import copy -import json import logging import os -import re import string import sys import warnings @@ -20,8 +17,8 @@ from ruamel.yaml import YAML from setuptools import setup, find_packages, Command from setuptools.command.build_py import build_py -from typing import Dict, List, Sequence, Optional, Tuple -from deepdiff import DeepDiff +from typing import Dict, List, Sequence, Optional, Tuple, cast +from pysetup.md_to_spec import MarkdownToSpec pysetup_path = os.path.abspath(os.path.dirname(__file__)) sys.path.insert(0, pysetup_path) @@ -43,11 +40,8 @@ ) from pysetup.typing import ( BuildTarget, - ProtocolDefinition, SpecObject, - VariableDefinition, ) -from pysetup.md_to_spec import MarkdownToSpec # Ignore '1.5.0-alpha.*' to '1.5.0a*' messages. warnings.filterwarnings('ignore', message='Normalizing .* to .*') @@ -59,390 +53,9 @@ def filter(self, record): logging.getLogger().addFilter(PyspecFilter()) -@lru_cache(maxsize=None) -def _get_name_from_heading(heading: Heading) -> Optional[str]: - last_child = heading.children[-1] - if isinstance(last_child, CodeSpan): - return last_child.children - return None - - -@lru_cache(maxsize=None) -def _get_source_from_code_block(block: FencedCode) -> str: - return block.children[0].children.strip() - - -@lru_cache(maxsize=None) -def _get_function_name_from_source(source: str) -> str: - fn = ast.parse(source).body[0] - return fn.name - - -@lru_cache(maxsize=None) -def _get_self_type_from_source(source: str) -> Optional[str]: - fn = ast.parse(source).body[0] - args = fn.args.args - if len(args) == 0: - return None - if args[0].arg != 'self': - return None - if args[0].annotation is None: - return None - return args[0].annotation.id - - -@lru_cache(maxsize=None) -def _get_class_info_from_source(source: str) -> Tuple[str, Optional[str]]: - class_def = ast.parse(source).body[0] - base = class_def.bases[0] - if isinstance(base, ast.Name): - parent_class = base.id - elif isinstance(base, ast.Subscript): - parent_class = base.value.id - else: - # NOTE: SSZ definition derives from earlier phase... - # e.g. `phase0.SignedBeaconBlock` - # TODO: check for consistency with other phases - parent_class = None - return class_def.name, parent_class - - -@lru_cache(maxsize=None) -def _is_constant_id(name: str) -> bool: - if name[0] not in string.ascii_uppercase + '_': - return False - return all(map(lambda c: c in string.ascii_uppercase + '_' + string.digits, name[1:])) - - -@lru_cache(maxsize=None) -def _load_kzg_trusted_setups(preset_name): - trusted_setups_file_path = str(Path(__file__).parent) + '/presets/' + preset_name + '/trusted_setups/trusted_setup_4096.json' - - with open(trusted_setups_file_path, 'r') as f: - json_data = json.load(f) - trusted_setup_G1_monomial = json_data['g1_monomial'] - trusted_setup_G1_lagrange = json_data['g1_lagrange'] - trusted_setup_G2_monomial = json_data['g2_monomial'] - - return trusted_setup_G1_monomial, trusted_setup_G1_lagrange, trusted_setup_G2_monomial - -@lru_cache(maxsize=None) -def _load_curdleproofs_crs(preset_name): - """ - NOTE: File generated from https://github.com/asn-d6/curdleproofs/blob/8e8bf6d4191fb6a844002f75666fb7009716319b/tests/crs.rs#L53-L67 - """ - file_path = str(Path(__file__).parent) + '/presets/' + preset_name + '/trusted_setups/curdleproofs_crs.json' - - with open(file_path, 'r') as f: - json_data = json.load(f) - - return json_data - - -ALL_KZG_SETUPS = { - 'minimal': _load_kzg_trusted_setups('minimal'), - 'mainnet': _load_kzg_trusted_setups('mainnet') -} - -ALL_CURDLEPROOFS_CRS = { - 'minimal': _load_curdleproofs_crs('minimal'), - 'mainnet': _load_curdleproofs_crs('mainnet'), -} - - -@lru_cache(maxsize=None) -def _parse_value(name: str, typed_value: str, type_hint: Optional[str] = None) -> VariableDefinition: - comment = None - if name in ("ROOT_OF_UNITY_EXTENDED", "ROOTS_OF_UNITY_EXTENDED", "ROOTS_OF_UNITY_REDUCED"): - comment = "noqa: E501" - - typed_value = typed_value.strip() - if '(' not in typed_value: - return VariableDefinition(type_name=None, value=typed_value, comment=comment, type_hint=type_hint) - i = typed_value.index('(') - type_name = typed_value[:i] - - return VariableDefinition(type_name=type_name, value=typed_value[i+1:-1], comment=comment, type_hint=type_hint) - - -def _update_constant_vars_with_kzg_setups(constant_vars, preset_dep_constant_vars, preset_name): - comment = "noqa: E501" - kzg_setups = ALL_KZG_SETUPS[preset_name] - preset_dep_constant_vars['KZG_SETUP_G1_MONOMIAL'] = VariableDefinition( - preset_dep_constant_vars['KZG_SETUP_G1_MONOMIAL'].value, - str(kzg_setups[0]), - comment, None - ) - preset_dep_constant_vars['KZG_SETUP_G1_LAGRANGE'] = VariableDefinition( - preset_dep_constant_vars['KZG_SETUP_G1_LAGRANGE'].value, - str(kzg_setups[1]), - comment, None - ) - constant_vars['KZG_SETUP_G2_MONOMIAL'] = VariableDefinition( - constant_vars['KZG_SETUP_G2_MONOMIAL'].value, - str(kzg_setups[2]), - comment, None - ) - - -def _update_constant_vars_with_curdleproofs_crs(constant_vars, preset_dep_constant_vars, preset_name): - comment = "noqa: E501" - constant_vars['CURDLEPROOFS_CRS'] = VariableDefinition( - None, - 'curdleproofs.CurdleproofsCrs.from_json(json.dumps(' + str(ALL_CURDLEPROOFS_CRS[str(preset_name)]).replace('0x', '') + '))', - comment, None - ) - - -@lru_cache(maxsize=None) -def parse_markdown(content: str): - return gfm.parse(content) - - -def check_yaml_matches_spec(var_name, yaml, value_def): - """ - This function performs a sanity check for presets & configs. To a certain degree, it ensures - that the values in the specifications match those in the yaml files. - """ - if var_name == "TERMINAL_BLOCK_HASH": - # This is just Hash32() in the specs, that's fine - return - - # We use a var in the definition of a new var, replace usages - # Reverse sort so that overridden values come first - updated_value = value_def.value - for var in sorted(yaml.keys(), reverse=True): - if var in updated_value: - updated_value = updated_value.replace(var, yaml[var]) - try: - assert yaml[var_name] == repr(eval(updated_value)), \ - f"mismatch for {var_name}: {yaml[var_name]} vs {eval(updated_value)}" - except NameError: - # Okay it's probably something more serious, let's ignore - pass - -def get_spec_new(file_name: Path, preset: Dict[str, str], config: Dict[str, str], preset_name=str) -> SpecObject: +def get_spec(file_name: Path, preset: Dict[str, str], config: Dict[str, str | list[dict[str, str]]], preset_name: str) -> SpecObject: return MarkdownToSpec(file_name, preset, config, preset_name).run() -def get_spec(file_name: Path, preset: Dict[str, str], config: Dict[str, str], preset_name=str) -> SpecObject: - functions: Dict[str, str] = {} - protocols: Dict[str, ProtocolDefinition] = {} - constant_vars: Dict[str, VariableDefinition] = {} - preset_dep_constant_vars: Dict[str, VariableDefinition] = {} - preset_vars: Dict[str, VariableDefinition] = {} - config_vars: Dict[str, VariableDefinition] = {} - ssz_dep_constants: Dict[str, str] = {} - func_dep_presets: Dict[str, str] = {} - ssz_objects: Dict[str, str] = {} - dataclasses: Dict[str, str] = {} - all_custom_types: Dict[str, str] = {} - - with open(file_name) as source_file: - document = parse_markdown(source_file.read()) - - current_name = None - should_skip = False - list_of_records = None - list_of_records_name = None - for child in document.children: - if isinstance(child, BlankLine): - continue - if should_skip: - should_skip = False - continue - if isinstance(child, Heading): - current_name = _get_name_from_heading(child) - elif isinstance(child, FencedCode): - if child.lang != "python": - continue - source = _get_source_from_code_block(child) - if source.startswith("def"): - current_name = _get_function_name_from_source(source) - self_type_name = _get_self_type_from_source(source) - function_def = "\n".join(line.rstrip() for line in source.splitlines()) - if self_type_name is None: - functions[current_name] = function_def - else: - if self_type_name not in protocols: - protocols[self_type_name] = ProtocolDefinition(functions={}) - protocols[self_type_name].functions[current_name] = function_def - elif source.startswith("@dataclass"): - dataclasses[ast.parse(source).body[0].name] = "\n".join(line.rstrip() for line in source.splitlines()) - elif source.startswith("class"): - class_name, parent_class = _get_class_info_from_source(source) - # check consistency with spec - try: - assert class_name == current_name - except Exception: - print('class_name', class_name) - print('current_name', current_name) - raise - - if parent_class: - assert parent_class == "Container" - # NOTE: trim whitespace from spec - ssz_objects[current_name] = "\n".join(line.rstrip() for line in source.splitlines()) - else: - raise Exception("unrecognized python code element: " + source) - elif isinstance(child, Table) and list_of_records is not None: - list_of_records_header = None - for i, row in enumerate(child.children): - # This will start as an empty list when there is a comment, - # which indicates that the next table is a list-of-records. After we're done parsing - # the table, we will reset this to None. - if list_of_records is not None: - if i == 0: - # Save the table header, this will be used for field names - # Skip the last item, which is the description - list_of_records_header = [ - # Convert the titles to SNAKE_CASE - re.sub(r'\s+', '_', value.children[0].children.upper()) - for value in row.children[:-1] - ] - else: - # Add the row entry to our list of records - list_of_records.append({ - list_of_records_header[i]: value.children[0].children - for i, value in enumerate(row.children[:-1]) - }) - - # Make a type map from the spec definition - # We'll apply this to the file config (ie mainnet.yaml) - type_map: dict[str,str] = {} - pattern = re.compile(r'^(\w+)\(.*\)$') - for entry in list_of_records: - for k, v in entry.items(): - m = pattern.match(v) - if m: - type_map[k] = m.group(1) - - # Apply the types to the file config - list_of_records_config: list[dict[str,str]] = [] - for entry in config[list_of_records_name]: - new_entry: dict[str,str] = {} - for k, v in entry.items(): - ctor = type_map.get(k) - if ctor: - new_entry[k] = f"{ctor}({v})" - else: - new_entry[k] = v - list_of_records_config.append(new_entry) - - # For mainnet, check that the spec config & file config are the same - # For minimal, we expect this to be different; just use the file config - if preset_name == "mainnet": - assert list_of_records == list_of_records_config, \ - f"list of records mismatch: {list_of_records} vs {list_of_records_config}" - elif preset_name == "minimal": - list_of_records = list_of_records_config - - # Set the config variable and reset the global variable - config_vars[list_of_records_name] = list_of_records - list_of_records = None - - elif isinstance(child, Table): - for row in child.children: - cells = row.children - if len(cells) >= 2: - name_cell = cells[0] - name = name_cell.children[0].children - - value_cell = cells[1] - value = value_cell.children[0].children - - description = None - if len(cells) >= 3: - description_cell = cells[2] - if len(description_cell.children) > 0: - description = description_cell.children[0].children - if isinstance(description, list): - # marko parses `**X**` as a list containing a X - description = description[0].children - - if isinstance(name, list): - # marko parses `[X]()` as a list containing a X - name = name[0].children - if isinstance(value, list): - # marko parses `**X**` as a list containing a X - value = value[0].children - - # Skip types that have been defined elsewhere - if description is not None and description.startswith(""): - continue - - if not _is_constant_id(name): - # Check for short type declarations - if value.startswith(("uint", "Bytes", "ByteList", "Union", "Vector", "List", "ByteVector")): - all_custom_types[name] = value - continue - - if value.startswith("get_generalized_index"): - ssz_dep_constants[name] = value - continue - - if description is not None and description.startswith(""): - func_dep_presets[name] = value - - value_def = _parse_value(name, value) - if name in preset: - if preset_name == "mainnet": - check_yaml_matches_spec(name, preset, value_def) - preset_vars[name] = VariableDefinition(value_def.type_name, preset[name], value_def.comment, None) - elif name in config: - if preset_name == "mainnet": - check_yaml_matches_spec(name, config, value_def) - config_vars[name] = VariableDefinition(value_def.type_name, config[name], value_def.comment, None) - else: - if name in ('ENDIANNESS', 'KZG_ENDIANNESS'): - # Deal with mypy Literal typing check - value_def = _parse_value(name, value, type_hint='Final') - if any(k in value for k in preset) or any(k in value for k in preset_dep_constant_vars): - preset_dep_constant_vars[name] = value_def - else: - constant_vars[name] = value_def - - elif isinstance(child, HTMLBlock): - if child.body.strip() == "": - should_skip = True - # Handle list-of-records tables - match = re.match(r"", child.body.strip()) - if match: - # Initialize list-of-records, in the next iteration this will indicate that the - # table is a list-of-records and must be parsed differently. - list_of_records = [] - # Use regex to extract the desired configuration list name - list_of_records_name = match.group(1).upper() - - # Load KZG trusted setup from files - if any('KZG_SETUP' in name for name in constant_vars): - _update_constant_vars_with_kzg_setups(constant_vars, preset_dep_constant_vars, preset_name) - - if any('CURDLEPROOFS_CRS' in name for name in constant_vars): - _update_constant_vars_with_curdleproofs_crs(constant_vars, preset_dep_constant_vars, preset_name) - - custom_types: Dict[str, str] = {} - preset_dep_custom_types: Dict[str, str] = {} - for name, value in all_custom_types.items(): - if any(k in value for k in preset) or any(k in value for k in preset_dep_constant_vars): - preset_dep_custom_types[name] = value - else: - custom_types[name] = value - - return SpecObject( - functions=functions, - protocols=protocols, - custom_types=custom_types, - preset_dep_custom_types=preset_dep_custom_types, - constant_vars=constant_vars, - preset_dep_constant_vars=preset_dep_constant_vars, - preset_vars=preset_vars, - config_vars=config_vars, - ssz_dep_constants=ssz_dep_constants, - func_dep_presets=func_dep_presets, - ssz_objects=ssz_objects, - dataclasses=dataclasses, - ) - @lru_cache(maxsize=None) def load_preset(preset_files: Sequence[Path]) -> Dict[str, str]: @@ -460,11 +73,11 @@ def load_preset(preset_files: Sequence[Path]) -> Dict[str, str]: raise Exception(f"duplicate config var(s) in preset files: {', '.join(duplicates)}") preset.update(fork_preset) assert preset != {} - return parse_config_vars(preset) + return cast(Dict[str, str], parse_config_vars(preset)) @lru_cache(maxsize=None) -def load_config(config_path: Path) -> Dict[str, str]: +def load_config(config_path: Path) -> Dict[str, str | List[Dict[str, str]]]: """ Loads the given configuration file. """ @@ -480,10 +93,7 @@ def build_spec(fork: str, config_file: Path) -> str: preset = load_preset(tuple(preset_files)) config = load_config(config_file) - all_specs = [get_spec_new(spec, preset, config, preset_name) for spec in source_files] - all_specs_old = [get_spec(spec, preset, config, preset_name) for spec in source_files] - - assert DeepDiff(all_specs, all_specs_old, ignore_order=True) == {}, f"specs differ: {DeepDiff(all_specs, all_specs_old, ignore_order=True)}" + all_specs = [get_spec(spec, preset, config, preset_name) for spec in source_files] spec_object = all_specs[0] for value in all_specs[1:]: