From e01a8004242a9e5f36107af1fb1c25d346dd9525 Mon Sep 17 00:00:00 2001
From: Leo Lara <leo@leolara.me>
Date: Fri, 16 May 2025 03:05:50 +0000
Subject: [PATCH 01/18] Initial checkpoint

---
 Makefile              |   2 +-
 pysetup/mk_to_spec.py | 513 ++++++++++++++++++++++++++++++++++++++++++
 setup.py              |  11 +-
 3 files changed, 523 insertions(+), 3 deletions(-)
 create mode 100644 pysetup/mk_to_spec.py

diff --git a/Makefile b/Makefile
index 3366aca4e1..25be7bd587 100644
--- a/Makefile
+++ b/Makefile
@@ -275,4 +275,4 @@ kzg_setups: pyspec
 
 # Delete all untracked files.
 clean:
-	@git clean -fdx
+	rm -fR venv .mypy_cache  build eth2spec.egg-info pysetup/__pycache__ pysetup/spec_builders/__pycache__ 
diff --git a/pysetup/mk_to_spec.py b/pysetup/mk_to_spec.py
new file mode 100644
index 0000000000..df97b86e3c
--- /dev/null
+++ b/pysetup/mk_to_spec.py
@@ -0,0 +1,513 @@
+import ast
+import json
+from pathlib import Path
+import string
+from typing import Dict, Optional, Tuple
+import re
+from functools import lru_cache
+
+
+from marko.block import BlankLine, Heading, FencedCode, HTMLBlock
+from marko.ext.gfm import gfm
+from marko.ext.gfm.elements import Table
+from marko.inline import CodeSpan
+
+from .typing import ProtocolDefinition, VariableDefinition, SpecObject
+
+class MarkdownToSpec:
+    def __init__(self, file_name: Path, preset: Dict[str, str], config: Dict[str, str], preset_name: str):
+        self.file_name = file_name
+        self.preset = preset
+        self.config = config
+        self.preset_name = preset_name
+
+        self.functions: Dict[str, str] = {}
+        self.protocols: Dict[str, ProtocolDefinition] = {}
+        self.constant_vars: Dict[str, VariableDefinition] = {}
+        self.preset_dep_constant_vars: Dict[str, VariableDefinition] = {}
+        self.preset_vars: Dict[str, VariableDefinition] = {}
+        self.config_vars: Dict[str, VariableDefinition] = {}
+        self.ssz_dep_constants: Dict[str, str] = {}
+        self.func_dep_presets: Dict[str, str] = {}
+        self.ssz_objects: Dict[str, str] = {}
+        self.dataclasses: Dict[str, str] = {}
+        self.all_custom_types: Dict[str, str] = {}
+        self.custom_types: Dict[str, str] = {}
+        self.preset_dep_custom_types: Dict[str, str] = {}
+
+        self.document = None
+        self.document_iterator = None
+        self.current_name = None
+        self.should_skip = False
+        self.list_of_records = None
+        self.list_of_records_name = None
+
+    def run(self) -> SpecObject:
+        """
+        Orchestrates the parsing and processing of the markdown spec file.
+        - Calls _parse_document()
+        - Iterates over self.document.children and processes each child
+        - Calls _finalize_types() and _build_spec_object() after processing
+        Returns:
+            SpecObject: The constructed specification object.
+        """
+        self._parse_document()
+        # self.document_iterator = iter(self.document.children)
+        # while (child := self._get_next_element()) is not None:
+        for child in self.document.children:
+            self._process_child(child)
+        self._finalize_types()
+        return self._build_spec_object()
+
+    def _get_next_element(self):
+        """
+        Returns the next element in the document.
+        If the end of the document is reached, returns None.
+        """
+
+        try:
+            # while isinstance(result := next(self.document_iterator), BlankLine):
+            #     pass
+            # return result
+            next(self.document_iterator)
+        except StopIteration:
+            return None
+
+    def _finalize_types(self):
+        """
+        Processes all_custom_types into custom_types and preset_dep_custom_types.
+        Calls helper functions to update KZG and CURDLEPROOFS setups if needed.
+        """
+        # Update KZG trusted setup if needed
+        if any('KZG_SETUP' in name for name in self.constant_vars):
+            _update_constant_vars_with_kzg_setups(
+                self.constant_vars, self.preset_dep_constant_vars, self.preset_name
+            )
+
+        # Update CURDLEPROOFS CRS if needed
+        if any('CURDLEPROOFS_CRS' in name for name in self.constant_vars):
+            _update_constant_vars_with_curdleproofs_crs(
+                self.constant_vars, self.preset_dep_constant_vars, self.preset_name
+            )
+
+        # Split all_custom_types into custom_types and preset_dep_custom_types
+        self.custom_types = {}
+        self.preset_dep_custom_types = {}
+        for name, value in self.all_custom_types.items():
+            if any(k in value for k in self.preset) or any(k in value for k in self.preset_dep_constant_vars):
+                self.preset_dep_custom_types[name] = value
+            else:
+                self.custom_types[name] = value
+
+    def _parse_document(self):
+        """
+        Opens the markdown file, parses its content into a document object using _parse_markdown,
+        and stores the parsed document in self.document.
+        """
+        with open(self.file_name) as source_file:
+            self.document = parse_markdown(source_file.read())
+
+    def _process_child(self, child):
+        # Skip blank lines
+        if isinstance(child, BlankLine):
+            return
+
+        if self.should_skip:
+            self.should_skip = False
+            return
+
+            # Dispatch to the correct handler
+        if isinstance(child, Heading):
+            self._process_heading(child)
+        elif isinstance(child, FencedCode):
+            self._process_code_block(child)
+        elif isinstance(child, Table):
+            # Handler for list-of-records is managed by state in _process_html_block
+            if self.list_of_records is not None:
+                self._process_list_of_records_table(child)
+            else:
+                self._process_table(child)
+        elif isinstance(child, HTMLBlock):
+            self._process_html_block(child)
+
+    def _process_heading(self, child):
+        """
+        Extracts the section name from the heading and updates current_name for context.
+        """
+        if not isinstance(child, Heading):
+            return
+        self.current_name = _get_name_from_heading(child)
+        # else: skip unknown types
+
+    def _process_code_block(self, child):
+        """
+        Processes a FencedCode block:
+        - Checks if the code block is Python.
+        - Extracts source code and determines if it is a function, dataclass, or class.
+        - Updates the appropriate dictionary (functions, protocols, dataclasses, ssz_objects).
+        """
+        if child.lang != "python":
+            return
+
+        source = _get_source_from_code_block(child)
+
+        if source.startswith("def"):
+            self._process_code_def(source)
+        elif source.startswith("@dataclass"):
+            self._process_code_dataclass(source)
+        elif source.startswith("class"):
+            self._process_code_class(source)
+        else:
+            raise Exception("unrecognized python code element: " + source)
+
+    def _process_code_def(self, source):
+        self.current_name = _get_function_name_from_source(source)
+        self_type_name = _get_self_type_from_source(source)
+        function_def = "\n".join(line.rstrip() for line in source.splitlines())
+        if self_type_name is None:
+            self.functions[self.current_name] = function_def
+        else:
+            if self_type_name not in self.protocols:
+                self.protocols[self_type_name] = ProtocolDefinition(
+                    functions={})
+            self.protocols[self_type_name].functions[self.current_name] = function_def
+
+    def _process_code_dataclass(self, source):
+        """ if self.current_name is None:
+            raise Exception(f"found @dataclass without a name: {source}")"""
+        self.dataclasses[self.current_name] = "\n".join(
+            line.rstrip() for line in source.splitlines())
+
+    def _process_code_class(self, source):
+        class_name, parent_class = _get_class_info_from_source(source)
+        # check consistency with spec
+        if class_name != self.current_name:
+            raise Exception(
+                f"class_name {class_name} != current_name {self.current_name}")
+
+        if parent_class:
+            assert parent_class == "Container"
+        self.ssz_objects[self.current_name] = "\n".join(
+            line.rstrip() for line in source.splitlines())
+
+    def _process_table(self, child):
+        """
+        Handles standard tables (not list-of-records).
+        Iterates over rows, extracting variable names, values, and descriptions.
+        Determines if the variable is a constant, preset, config, or custom type.
+        Updates the corresponding dictionaries.
+        Handles special cases for predefined types and function-dependent presets.
+        """
+
+        for row in child.children:
+            cells = row.children
+            if len(cells) >= 2:
+                name_cell = cells[0]
+                name = name_cell.children[0].children
+
+                value_cell = cells[1]
+                value = value_cell.children[0].children
+
+                description = None
+                if len(cells) >= 3:
+                    description_cell = cells[2]
+                    if len(description_cell.children) > 0:
+                        description = description_cell.children[0].children
+                        if isinstance(description, list):
+                            description = description[0].children
+
+                if isinstance(name, list):
+                    name = name[0].children
+                if isinstance(value, list):
+                    value = value[0].children
+
+                # Skip types that have been defined elsewhere
+                if description is not None and description.startswith("<!-- predefined-type -->"):
+                    continue
+
+                if not _is_constant_id(name):
+                    # Check for short type declarations
+                    if value.startswith(("uint", "Bytes", "ByteList", "Union", "Vector", "List", "ByteVector")):
+                        self.all_custom_types[name] = value
+                    continue
+
+                if value.startswith("get_generalized_index"):
+                    self.ssz_dep_constants[name] = value
+                    continue
+
+                if description is not None and description.startswith("<!-- predefined -->"):
+                    self.func_dep_presets[name] = value
+
+                value_def = _parse_value(name, value)
+                if name in self.preset:
+                    if self.preset_name == "mainnet":
+                        check_yaml_matches_spec(
+                            name, self.preset, value_def)
+                    self.preset_vars[name] = VariableDefinition(
+                        value_def.type_name, self.preset[name], value_def.comment, None)
+                elif name in self.config:
+                    if self.preset_name == "mainnet":
+                        check_yaml_matches_spec(
+                            name, self.config, value_def)
+                    self.config_vars[name] = VariableDefinition(
+                        value_def.type_name, self.config[name], value_def.comment, None)
+                else:
+                    if name in ('ENDIANNESS', 'KZG_ENDIANNESS'):
+                        # Deal with mypy Literal typing check
+                        value_def = _parse_value(
+                            name, value, type_hint='Final')
+                    if any(k in value for k in self.preset) or any(k in value for k in self.preset_dep_constant_vars):
+                        self.preset_dep_constant_vars[name] = value_def
+                    else:
+                        self.constant_vars[name] = value_def
+
+    def _process_list_of_records_table(self, child):
+        """
+        Handles tables marked as 'list-of-records'.
+        Extracts headers and rows, mapping field names and types.
+        Applies type mapping to config entries.
+        Validates or updates the config variable as needed based on preset_name.
+        Updates config_vars with the processed list.
+        """
+
+        list_of_records_header = None
+        for i, row in enumerate(child.children):
+            if i == 0:
+                # Save the table header, used for field names (skip last item: description)
+                list_of_records_header = [
+                    re.sub(r'\s+', '_', value.children[0].children.upper())
+                    for value in row.children[:-1]
+                ]
+            else:
+                # Add the row entry to our list of records
+                self.list_of_records.append({
+                    list_of_records_header[j]: value.children[0].children
+                    for j, value in enumerate(row.children[:-1])
+                })
+
+        # Make a type map from the spec definition
+        type_map: dict[str, str] = {}
+        pattern = re.compile(r'^(\w+)\(.*\)$')
+        for entry in self.list_of_records:
+            for k, v in entry.items():
+                m = pattern.match(v)
+                if m:
+                    type_map[k] = m.group(1)
+
+        # Apply the types to the file config
+        list_of_records_config: list[dict[str, str]] = []
+        for entry in self.config[self.list_of_records_name]:
+            new_entry = {}
+            for k, v in entry.items():
+                ctor = type_map.get(k)
+                if ctor:
+                    new_entry[k] = f"{ctor}({v})"
+                else:
+                    new_entry[k] = v
+            list_of_records_config.append(new_entry)
+
+        # For mainnet, check that the spec config & file config are the same
+        if self.preset_name == "mainnet":
+            assert self.list_of_records == list_of_records_config, \
+                f"list of records mismatch: {self.list_of_records} vs {list_of_records_config}"
+        elif self.preset_name == "minimal":
+            self.list_of_records = list_of_records_config
+
+        # Set the config variable and reset the state
+        self.config_vars[self.list_of_records_name] = self.list_of_records
+        self.list_of_records = None
+
+    def _process_html_block(self, child):
+        """
+        Handles HTML comments for skip logic and list-of-records detection.
+        Sets flags or state variables for the next iteration.
+        """
+
+        body = child.body.strip()
+        if body == "<!-- eth2spec: skip -->":
+            self.should_skip = True
+        # Handle list-of-records tables
+        match = re.match(
+            r"<!--\s*list-of-records:([a-zA-Z0-9_-]+)\s*-->", body)
+        if match:
+            self.list_of_records = []
+            self.list_of_records_name = match.group(1).upper()
+
+    def _build_spec_object(self):
+        """
+        Constructs and returns the SpecObject using all collected data.
+        """
+        return SpecObject(
+            functions=self.functions,
+            protocols=self.protocols,
+            custom_types=self.custom_types,
+            preset_dep_custom_types=self.preset_dep_custom_types,
+            constant_vars=self.constant_vars,
+            preset_dep_constant_vars=self.preset_dep_constant_vars,
+            preset_vars=self.preset_vars,
+            config_vars=self.config_vars,
+            ssz_dep_constants=self.ssz_dep_constants,
+            func_dep_presets=self.func_dep_presets,
+            ssz_objects=self.ssz_objects,
+            dataclasses=self.dataclasses,
+        )
+
+@lru_cache(maxsize=None)
+def _get_name_from_heading(heading: Heading) -> Optional[str]:
+    last_child = heading.children[-1]
+    if isinstance(last_child, CodeSpan):
+        return last_child.children
+    return None
+
+
+@lru_cache(maxsize=None)
+def _get_source_from_code_block(block: FencedCode) -> str:
+    return block.children[0].children.strip()
+
+
+@lru_cache(maxsize=None)
+def _get_function_name_from_source(source: str) -> str:
+    fn = ast.parse(source).body[0]
+    return fn.name
+
+
+@lru_cache(maxsize=None)
+def _get_self_type_from_source(source: str) -> Optional[str]:
+    fn = ast.parse(source).body[0]
+    args = fn.args.args
+    if len(args) == 0:
+        return None
+    if args[0].arg != 'self':
+        return None
+    if args[0].annotation is None:
+        return None
+    return args[0].annotation.id
+
+
+@lru_cache(maxsize=None)
+def _get_class_info_from_source(source: str) -> Tuple[str, Optional[str]]:
+    class_def = ast.parse(source).body[0]
+    base = class_def.bases[0]
+    if isinstance(base, ast.Name):
+        parent_class = base.id
+    elif isinstance(base, ast.Subscript):
+        parent_class = base.value.id
+    else:
+        # NOTE: SSZ definition derives from earlier phase...
+        # e.g. `phase0.SignedBeaconBlock`
+        # TODO: check for consistency with other phases
+        parent_class = None
+    return class_def.name, parent_class
+
+
+@lru_cache(maxsize=None)
+def _is_constant_id(name: str) -> bool:
+    if name[0] not in string.ascii_uppercase + '_':
+        return False
+    return all(map(lambda c: c in string.ascii_uppercase + '_' + string.digits, name[1:]))
+
+@lru_cache(maxsize=None)
+def _load_kzg_trusted_setups(preset_name):
+    trusted_setups_file_path = str(Path(__file__).parent.parent) + '/presets/' + preset_name + '/trusted_setups/trusted_setup_4096.json'
+
+    with open(trusted_setups_file_path, 'r') as f:
+        json_data = json.load(f)
+        trusted_setup_G1_monomial = json_data['g1_monomial']
+        trusted_setup_G1_lagrange = json_data['g1_lagrange']
+        trusted_setup_G2_monomial = json_data['g2_monomial']
+
+    return trusted_setup_G1_monomial, trusted_setup_G1_lagrange, trusted_setup_G2_monomial
+
+@lru_cache(maxsize=None)
+def _load_curdleproofs_crs(preset_name):
+    """
+    NOTE: File generated from https://github.com/asn-d6/curdleproofs/blob/8e8bf6d4191fb6a844002f75666fb7009716319b/tests/crs.rs#L53-L67
+    """
+    file_path = str(Path(__file__).parent.parent) + '/presets/' + preset_name + '/trusted_setups/curdleproofs_crs.json'
+
+    with open(file_path, 'r') as f:
+        json_data = json.load(f)
+
+    return json_data
+
+
+ALL_KZG_SETUPS = {
+    'minimal': _load_kzg_trusted_setups('minimal'),
+    'mainnet': _load_kzg_trusted_setups('mainnet')
+}
+
+ALL_CURDLEPROOFS_CRS = {
+    'minimal': _load_curdleproofs_crs('minimal'),
+    'mainnet': _load_curdleproofs_crs('mainnet'),
+}
+
+@lru_cache(maxsize=None)
+def _parse_value(name: str, typed_value: str, type_hint: Optional[str] = None) -> VariableDefinition:
+    comment = None
+    if name in ("ROOT_OF_UNITY_EXTENDED", "ROOTS_OF_UNITY_EXTENDED", "ROOTS_OF_UNITY_REDUCED"):
+        comment = "noqa: E501"
+
+    typed_value = typed_value.strip()
+    if '(' not in typed_value:
+        return VariableDefinition(type_name=None, value=typed_value, comment=comment, type_hint=type_hint)
+    i = typed_value.index('(')
+    type_name = typed_value[:i]
+
+    return VariableDefinition(type_name=type_name, value=typed_value[i+1:-1], comment=comment, type_hint=type_hint)
+
+
+def _update_constant_vars_with_kzg_setups(constant_vars, preset_dep_constant_vars, preset_name):
+    comment = "noqa: E501"
+    kzg_setups = ALL_KZG_SETUPS[preset_name]
+    preset_dep_constant_vars['KZG_SETUP_G1_MONOMIAL'] = VariableDefinition(
+        preset_dep_constant_vars['KZG_SETUP_G1_MONOMIAL'].value,
+        str(kzg_setups[0]),
+        comment, None
+    )
+    preset_dep_constant_vars['KZG_SETUP_G1_LAGRANGE'] = VariableDefinition(
+        preset_dep_constant_vars['KZG_SETUP_G1_LAGRANGE'].value,
+        str(kzg_setups[1]),
+        comment, None
+    )
+    constant_vars['KZG_SETUP_G2_MONOMIAL'] = VariableDefinition(
+        constant_vars['KZG_SETUP_G2_MONOMIAL'].value,
+        str(kzg_setups[2]),
+        comment, None
+    )
+
+
+def _update_constant_vars_with_curdleproofs_crs(constant_vars, preset_dep_constant_vars, preset_name):
+    comment = "noqa: E501"
+    constant_vars['CURDLEPROOFS_CRS'] = VariableDefinition(
+        None,
+        'curdleproofs.CurdleproofsCrs.from_json(json.dumps(' + str(ALL_CURDLEPROOFS_CRS[str(preset_name)]).replace('0x', '') + '))',
+        comment, None
+    )
+
+
+@lru_cache(maxsize=None)
+def parse_markdown(content: str):
+    return gfm.parse(content)
+
+
+def check_yaml_matches_spec(var_name, yaml, value_def):
+    """
+    This function performs a sanity check for presets & configs. To a certain degree, it ensures
+    that the values in the specifications match those in the yaml files.
+    """
+    if var_name == "TERMINAL_BLOCK_HASH":
+        # This is just Hash32() in the specs, that's fine
+        return
+
+    # We use a var in the definition of a new var, replace usages
+    # Reverse sort so that overridden values come first
+    updated_value = value_def.value
+    for var in sorted(yaml.keys(), reverse=True):
+        if var in updated_value:
+            updated_value = updated_value.replace(var, yaml[var])
+    try:
+        assert yaml[var_name] == repr(eval(updated_value)), \
+            f"mismatch for {var_name}: {yaml[var_name]} vs {eval(updated_value)}"
+    except NameError:
+        # Okay it's probably something more serious, let's ignore
+        pass
diff --git a/setup.py b/setup.py
index 3a3b27be75..afefe7b565 100644
--- a/setup.py
+++ b/setup.py
@@ -21,6 +21,7 @@
 from setuptools import setup, find_packages, Command
 from setuptools.command.build_py import build_py
 from typing import Dict, List, Sequence, Optional, Tuple
+from deepdiff import DeepDiff
 
 pysetup_path = os.path.abspath(os.path.dirname(__file__))
 sys.path.insert(0, pysetup_path)
@@ -46,7 +47,7 @@
     SpecObject,
     VariableDefinition,
 )
-
+from pysetup.mk_to_spec import MarkdownToSpec
 
 # Ignore '1.5.0-alpha.*' to '1.5.0a*' messages.
 warnings.filterwarnings('ignore', message='Normalizing .* to .*')
@@ -220,6 +221,8 @@ def check_yaml_matches_spec(var_name, yaml, value_def):
         # Okay it's probably something more serious, let's ignore
         pass
 
+def get_spec_new(file_name: Path, preset: Dict[str, str], config: Dict[str, str], preset_name=str) -> SpecObject:
+    return MarkdownToSpec(file_name, preset, config, preset_name).run()
 
 def get_spec(file_name: Path, preset: Dict[str, str], config: Dict[str, str], preset_name=str) -> SpecObject:
     functions: Dict[str, str] = {}
@@ -477,7 +480,10 @@ def build_spec(fork: str,
                config_file: Path) -> str:
     preset = load_preset(tuple(preset_files))
     config = load_config(config_file)
-    all_specs = [get_spec(spec, preset, config, preset_name) for spec in source_files]
+    all_specs = [get_spec_new(spec, preset, config, preset_name) for spec in source_files]
+    all_specs_old = [get_spec(spec, preset, config, preset_name) for spec in source_files]
+    
+    assert DeepDiff(all_specs, all_specs_old, ignore_order=True) == {}, f"specs differ: {DeepDiff(all_specs, all_specs_old, ignore_order=True)}"
 
     spec_object = all_specs[0]
     for value in all_specs[1:]:
@@ -676,3 +682,4 @@ def run(self):
     py_modules=["eth2spec"],
     cmdclass=commands,
 )
+

From f0adad625313ed922457f3a27b6930c8f2a79d2c Mon Sep 17 00:00:00 2001
From: Leo Lara <leo@leolara.me>
Date: Fri, 16 May 2025 16:13:20 +0000
Subject: [PATCH 02/18] Second checkpoint

---
 pyproject.toml        |   1 +
 pysetup/mk_to_spec.py | 490 +++++++++++++++++++++++++-----------------
 setup.py              |   4 +-
 3 files changed, 296 insertions(+), 199 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 01167d9053..b310ea598b 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -4,6 +4,7 @@ requires = [
   "ruamel.yaml==0.18.10",
   "setuptools==80.4.0",
   "wheel==0.45.1",
+  "deepdiff==8.5.0",
 ]
 
 [project]
diff --git a/pysetup/mk_to_spec.py b/pysetup/mk_to_spec.py
index df97b86e3c..c17689bb4e 100644
--- a/pysetup/mk_to_spec.py
+++ b/pysetup/mk_to_spec.py
@@ -2,12 +2,13 @@
 import json
 from pathlib import Path
 import string
-from typing import Dict, Optional, Tuple
+from typing import Dict, Optional, Tuple, Iterator
 import re
 from functools import lru_cache
 
 
-from marko.block import BlankLine, Heading, FencedCode, HTMLBlock
+from marko.block import BlankLine, Heading, FencedCode, HTMLBlock, Document
+from marko.element import Element
 from marko.ext.gfm import gfm
 from marko.ext.gfm.elements import Table
 from marko.inline import CodeSpan
@@ -16,7 +17,6 @@
 
 class MarkdownToSpec:
     def __init__(self, file_name: Path, preset: Dict[str, str], config: Dict[str, str], preset_name: str):
-        self.file_name = file_name
         self.preset = preset
         self.config = config
         self.preset_name = preset_name
@@ -35,12 +35,8 @@ def __init__(self, file_name: Path, preset: Dict[str, str], config: Dict[str, st
         self.custom_types: Dict[str, str] = {}
         self.preset_dep_custom_types: Dict[str, str] = {}
 
-        self.document = None
-        self.document_iterator = None
-        self.current_name = None
-        self.should_skip = False
-        self.list_of_records = None
-        self.list_of_records_name = None
+        self.document_iterator: Iterator[Element] = self._parse_document(file_name)
+        self.current_heading_name: str | None = None
 
     def run(self) -> SpecObject:
         """
@@ -51,146 +47,135 @@ def run(self) -> SpecObject:
         Returns:
             SpecObject: The constructed specification object.
         """
-        self._parse_document()
-        # self.document_iterator = iter(self.document.children)
-        # while (child := self._get_next_element()) is not None:
-        for child in self.document.children:
+        while (child := self._get_next_element()) is not None:
             self._process_child(child)
         self._finalize_types()
         return self._build_spec_object()
 
-    def _get_next_element(self):
+    def _get_next_element(self) -> Optional[Element]:
         """
         Returns the next element in the document.
         If the end of the document is reached, returns None.
         """
 
         try:
-            # while isinstance(result := next(self.document_iterator), BlankLine):
-            #     pass
-            # return result
-            next(self.document_iterator)
+            while isinstance(result := next(self.document_iterator), BlankLine):
+                pass
+            return result
         except StopIteration:
             return None
 
-    def _finalize_types(self):
+    def _skip_element(self) -> None:
         """
-        Processes all_custom_types into custom_types and preset_dep_custom_types.
-        Calls helper functions to update KZG and CURDLEPROOFS setups if needed.
+        Skips the current element in the document.
+        This is a placeholder for future functionality.
         """
-        # Update KZG trusted setup if needed
-        if any('KZG_SETUP' in name for name in self.constant_vars):
-            _update_constant_vars_with_kzg_setups(
-                self.constant_vars, self.preset_dep_constant_vars, self.preset_name
-            )
-
-        # Update CURDLEPROOFS CRS if needed
-        if any('CURDLEPROOFS_CRS' in name for name in self.constant_vars):
-            _update_constant_vars_with_curdleproofs_crs(
-                self.constant_vars, self.preset_dep_constant_vars, self.preset_name
-            )
+        self._get_next_element()
 
-        # Split all_custom_types into custom_types and preset_dep_custom_types
-        self.custom_types = {}
-        self.preset_dep_custom_types = {}
-        for name, value in self.all_custom_types.items():
-            if any(k in value for k in self.preset) or any(k in value for k in self.preset_dep_constant_vars):
-                self.preset_dep_custom_types[name] = value
-            else:
-                self.custom_types[name] = value
+    
 
-    def _parse_document(self):
+    def _parse_document(self, file_name: Path) -> Iterator[Element]:
         """
         Opens the markdown file, parses its content into a document object using _parse_markdown,
         and stores the parsed document in self.document.
         """
-        with open(self.file_name) as source_file:
-            self.document = parse_markdown(source_file.read())
+        with open(file_name) as source_file:
+            document = parse_markdown(source_file.read())
+            return iter(document.children)
 
-    def _process_child(self, child):
+    def _process_child(self, child: Element):
         # Skip blank lines
         if isinstance(child, BlankLine):
             return
 
-        if self.should_skip:
-            self.should_skip = False
-            return
-
-            # Dispatch to the correct handler
-        if isinstance(child, Heading):
-            self._process_heading(child)
-        elif isinstance(child, FencedCode):
-            self._process_code_block(child)
-        elif isinstance(child, Table):
-            # Handler for list-of-records is managed by state in _process_html_block
-            if self.list_of_records is not None:
-                self._process_list_of_records_table(child)
-            else:
+        # Dispatch to the correct handler
+        match child:
+            case Heading():
+                self._process_heading(child)
+            case FencedCode():
+                self._process_code_block(child)
+            case Table():
                 self._process_table(child)
-        elif isinstance(child, HTMLBlock):
-            self._process_html_block(child)
+            case HTMLBlock():
+                self._process_html_block(child)
 
-    def _process_heading(self, child):
+    def _process_heading(self, heading: Heading):
         """
         Extracts the section name from the heading and updates current_name for context.
         """
-        if not isinstance(child, Heading):
+        if not isinstance(heading, Heading):
             return
-        self.current_name = _get_name_from_heading(child)
+        self.current_heading_name = _get_name_from_heading(heading)
         # else: skip unknown types
 
-    def _process_code_block(self, child):
+    def _process_code_block(self, code_block: FencedCode):
         """
         Processes a FencedCode block:
         - Checks if the code block is Python.
         - Extracts source code and determines if it is a function, dataclass, or class.
         - Updates the appropriate dictionary (functions, protocols, dataclasses, ssz_objects).
         """
-        if child.lang != "python":
+        if code_block.lang != "python":
             return
 
-        source = _get_source_from_code_block(child)
+        source = _get_source_from_code_block(code_block)
+        module = ast.parse(source)
 
-        if source.startswith("def"):
-            self._process_code_def(source)
-        elif source.startswith("@dataclass"):
-            self._process_code_dataclass(source)
-        elif source.startswith("class"):
-            self._process_code_class(source)
+        clean_source = "\n".join(line.rstrip() for line in source.splitlines())
+        # AST container of the first definition in the block
+        first_def = module.body[0] 
+
+        if isinstance(first_def, ast.FunctionDef):
+            self._process_code_def(clean_source, first_def)
+        elif isinstance(first_def, ast.ClassDef) and _has_decorator(first_def, "dataclass"):
+            self._add_dataclass(clean_source, first_def)
+        elif isinstance(first_def, ast.ClassDef):
+            self._process_code_class(clean_source, first_def)
         else:
             raise Exception("unrecognized python code element: " + source)
 
-    def _process_code_def(self, source):
-        self.current_name = _get_function_name_from_source(source)
-        self_type_name = _get_self_type_from_source(source)
-        function_def = "\n".join(line.rstrip() for line in source.splitlines())
+    def _process_code_def(self, source: str, fn: ast.FunctionDef):
+        """
+        Processes a function definition node from the AST and stores its source code representation.
+        If the function is a method (i.e., has a self type), it is added to the protocol functions for that type.
+        Otherwise, it is stored as a standalone function.
+        Args:
+            source (str): The source code of the function definition.
+            fn (ast.FunctionDef): The AST node representing the function definition.
+        """
+
+        self_type_name = _get_self_type_from_source(fn)
+        
         if self_type_name is None:
-            self.functions[self.current_name] = function_def
+            self.functions[fn.name] = source
         else:
-            if self_type_name not in self.protocols:
-                self.protocols[self_type_name] = ProtocolDefinition(
-                    functions={})
-            self.protocols[self_type_name].functions[self.current_name] = function_def
-
-    def _process_code_dataclass(self, source):
-        """ if self.current_name is None:
-            raise Exception(f"found @dataclass without a name: {source}")"""
-        self.dataclasses[self.current_name] = "\n".join(
-            line.rstrip() for line in source.splitlines())
-
-    def _process_code_class(self, source):
-        class_name, parent_class = _get_class_info_from_source(source)
+            self._add_protocol_function(self_type_name, fn.name, source)
+
+    def _add_protocol_function(self, protocol_name: str, function_name: str, function_def: str):
+        """
+        Adds a function definition to the protocol functions dictionary.
+        """
+
+        if protocol_name not in self.protocols:
+            self.protocols[protocol_name] = ProtocolDefinition(
+                functions={})
+        self.protocols[protocol_name].functions[function_name] = function_def
+
+    def _add_dataclass(self, source, cls: ast.ClassDef):
+        self.dataclasses[cls.name] = source
+
+    def _process_code_class(self, source, cls: ast.ClassDef):
+        class_name, parent_class = _get_class_info_from_ast(cls)
+
         # check consistency with spec
-        if class_name != self.current_name:
-            raise Exception(
-                f"class_name {class_name} != current_name {self.current_name}")
+        if class_name != self.current_heading_name:
+            raise Exception(f"class_name {class_name} != current_name {self.current_heading_name}")
 
         if parent_class:
             assert parent_class == "Container"
-        self.ssz_objects[self.current_name] = "\n".join(
-            line.rstrip() for line in source.splitlines())
+        self.ssz_objects[class_name] = source
 
-    def _process_table(self, child):
+    def _process_table(self, child: HTMLBlock):
         """
         Handles standard tables (not list-of-records).
         Iterates over rows, extracting variable names, values, and descriptions.
@@ -200,103 +185,170 @@ def _process_table(self, child):
         """
 
         for row in child.children:
-            cells = row.children
-            if len(cells) >= 2:
-                name_cell = cells[0]
-                name = name_cell.children[0].children
-
-                value_cell = cells[1]
-                value = value_cell.children[0].children
-
-                description = None
-                if len(cells) >= 3:
-                    description_cell = cells[2]
-                    if len(description_cell.children) > 0:
-                        description = description_cell.children[0].children
-                        if isinstance(description, list):
-                            description = description[0].children
-
-                if isinstance(name, list):
-                    name = name[0].children
-                if isinstance(value, list):
-                    value = value[0].children
-
-                # Skip types that have been defined elsewhere
-                if description is not None and description.startswith("<!-- predefined-type -->"):
-                    continue
-
-                if not _is_constant_id(name):
-                    # Check for short type declarations
-                    if value.startswith(("uint", "Bytes", "ByteList", "Union", "Vector", "List", "ByteVector")):
-                        self.all_custom_types[name] = value
-                    continue
-
-                if value.startswith("get_generalized_index"):
-                    self.ssz_dep_constants[name] = value
-                    continue
-
-                if description is not None and description.startswith("<!-- predefined -->"):
-                    self.func_dep_presets[name] = value
-
-                value_def = _parse_value(name, value)
-                if name in self.preset:
-                    if self.preset_name == "mainnet":
-                        check_yaml_matches_spec(
-                            name, self.preset, value_def)
-                    self.preset_vars[name] = VariableDefinition(
-                        value_def.type_name, self.preset[name], value_def.comment, None)
-                elif name in self.config:
-                    if self.preset_name == "mainnet":
-                        check_yaml_matches_spec(
-                            name, self.config, value_def)
-                    self.config_vars[name] = VariableDefinition(
-                        value_def.type_name, self.config[name], value_def.comment, None)
+            if len(row.children) < 2:
+                continue
+
+            name, value, description = self._get_table_row_fields(row)
+
+            # Skip types that have been defined elsewhere
+            if description is not None and description.startswith("<!-- predefined-type -->"):
+                continue
+
+            # If it is not a constant, check if it is a custom type
+            if not _is_constant_id(name):
+                # Check for short type declarations
+                if value.startswith(("uint", "Bytes", "ByteList", "Union", "Vector", "List", "ByteVector")):
+                    self.all_custom_types[name] = value
+                continue
+
+            # It is a constant name and a generalized index
+            if value.startswith("get_generalized_index"):
+                self.ssz_dep_constants[name] = value
+                continue
+
+            # It is a constant and not a generalized index, and a function-dependent preset
+            if description is not None and description.startswith("<!-- predefined -->"):
+                self.func_dep_presets[name] = value
+
+            # It is a constant and not a generalized index
+            value_def = _parse_value(name, value)
+            # It is a preset
+            if name in self.preset:
+                if self.preset_name == "mainnet":
+                    check_yaml_matches_spec(name, self.preset, value_def)
+
+                self.preset_vars[name] = VariableDefinition(value_def.type_name, self.preset[name], value_def.comment, None)
+
+            # It is a config variable
+            elif name in self.config:
+                if self.preset_name == "mainnet":
+                    check_yaml_matches_spec(name, self.config, value_def)
+
+                self.config_vars[name] = VariableDefinition(value_def.type_name, self.config[name], value_def.comment, None)
+
+            # It is a constant variable or a preset_dep_constant_vars
+            else:
+                if name in ('ENDIANNESS', 'KZG_ENDIANNESS'):
+                    # Deal with mypy Literal typing check
+                    value_def = _parse_value(name, value, type_hint='Final')
+                if any(k in value for k in self.preset) or any(k in value for k in self.preset_dep_constant_vars):
+                    self.preset_dep_constant_vars[name] = value_def
                 else:
-                    if name in ('ENDIANNESS', 'KZG_ENDIANNESS'):
-                        # Deal with mypy Literal typing check
-                        value_def = _parse_value(
-                            name, value, type_hint='Final')
-                    if any(k in value for k in self.preset) or any(k in value for k in self.preset_dep_constant_vars):
-                        self.preset_dep_constant_vars[name] = value_def
-                    else:
-                        self.constant_vars[name] = value_def
+                    self.constant_vars[name] = value_def
+
+    @staticmethod
+    def _get_table_row_fields(row: Element) -> tuple[str, str, Optional[str]]:
+        """
+        Extracts the name, value, and description fields from a table row element.
+        Description can be None.
+        """
+        cells = row.children
+        name_cell = cells[0]
+        name = name_cell.children[0].children
+
+        value_cell = cells[1]
+        value = value_cell.children[0].children
+
+        if isinstance(name, list):
+            name = name[0].children
+        if isinstance(value, list):
+            value = value[0].children
+
+        description = None
+        if len(cells) >= 3:
+            description_cell = cells[2]
+            if len(description_cell.children) > 0:
+                description = description_cell.children[0].children
+                if isinstance(description, list):
+                    description = description[0].children
 
-    def _process_list_of_records_table(self, child):
+        return name, value, description
+
+    def _process_list_of_records_table(self, child, list_of_records_name):
         """
         Handles tables marked as 'list-of-records'.
         Extracts headers and rows, mapping field names and types.
         Applies type mapping to config entries.
         Validates or updates the config variable as needed based on preset_name.
         Updates config_vars with the processed list.
-        """
 
-        list_of_records_header = None
-        for i, row in enumerate(child.children):
-            if i == 0:
-                # Save the table header, used for field names (skip last item: description)
-                list_of_records_header = [
-                    re.sub(r'\s+', '_', value.children[0].children.upper())
-                    for value in row.children[:-1]
-                ]
-            else:
-                # Add the row entry to our list of records
-                self.list_of_records.append({
-                    list_of_records_header[j]: value.children[0].children
-                    for j, value in enumerate(row.children[:-1])
-                })
+        Example of input:
+            | Epoch                       | Max Blobs Per Block | Description                      |
+            | --------------------------- | ------------------- | -------------------------------- |
+            | `Epoch(269568)` **Deneb**   | `uint64(6)`         | The limit is set to `6` blobs    |
+            | `Epoch(364032)` **Electra** | `uint64(9)`         | The limit is raised to `9` blobs |
+
+        The method _process_html_block calls this method when it encounters a comment
+        of the form `<!-- list-of-records:name -->`.
+        """
+        list_of_records_spec = self._extract_list_of_records_spec(child)
 
         # Make a type map from the spec definition
+        type_map = self._make_list_of_records_type_map(list_of_records_spec)
+
+        # Apply the types to the file config
+        list_of_records_config_file = self._extract_typed_records_config(
+            list_of_records_name, type_map
+        )
+
+        # For mainnet, check that the spec config & file config are the same
+        # For minimal, we expect this to be different; just use the file config
+        if self.preset_name == "mainnet":
+            assert list_of_records_spec == list_of_records_config_file, \
+                f"list of records mismatch: {list_of_records_spec} vs {list_of_records_config_file}"
+
+        # Set the config variable
+        self.config_vars[list_of_records_name] = list_of_records_config_file
+
+    @staticmethod
+    def _make_list_of_records_type_map(list_of_records: list[dict[str, str]]) -> dict[str, str]:
+        """
+        Given a list of records (each a dict of field name to value), extract a mapping
+        from field name to type name, based on values of the form 'TypeName(...)'.
+        """
         type_map: dict[str, str] = {}
         pattern = re.compile(r'^(\w+)\(.*\)$')
-        for entry in self.list_of_records:
+        for entry in list_of_records:
             for k, v in entry.items():
                 m = pattern.match(v)
                 if m:
                     type_map[k] = m.group(1)
+        return type_map
 
-        # Apply the types to the file config
-        list_of_records_config: list[dict[str, str]] = []
-        for entry in self.config[self.list_of_records_name]:
+    @staticmethod
+    def _extract_list_of_records_spec(child) -> list[dict[str, str]]:
+        """
+        Extracts the list of records from a table element.
+        Returns a list of dicts, each representing a row with field names as keys.
+        """
+
+        # Save the table header, used for field names (skip last item: description)
+        header_row = child.children[0]
+        list_of_records_spec_header = [
+            re.sub(r'\s+', '_', value.children[0].children.upper())
+            for value in header_row.children[:-1]
+        ]
+
+        # Process the remaining rows
+        list_of_records_spec: list[dict[str, str]] = [
+            {
+                list_of_records_spec_header[j]: value.children[0].children
+                for j, value in enumerate(row.children[:-1])
+            }
+            for row in child.children[1:]
+        ]
+
+        return list_of_records_spec
+
+    def _extract_typed_records_config(
+        self, list_of_records_name: str, type_map: dict[str, str]
+    ) -> list[dict[str, str]]:
+        """
+        Applies type constructors to config entries based on the type map.
+        Returns a new list of dicts with types applied.
+        """
+        list_of_records_config_file: list[dict[str, str]] = []
+        for entry in self.config[list_of_records_name]:
             new_entry = {}
             for k, v in entry.items():
                 ctor = type_map.get(k)
@@ -304,18 +356,8 @@ def _process_list_of_records_table(self, child):
                     new_entry[k] = f"{ctor}({v})"
                 else:
                     new_entry[k] = v
-            list_of_records_config.append(new_entry)
-
-        # For mainnet, check that the spec config & file config are the same
-        if self.preset_name == "mainnet":
-            assert self.list_of_records == list_of_records_config, \
-                f"list of records mismatch: {self.list_of_records} vs {list_of_records_config}"
-        elif self.preset_name == "minimal":
-            self.list_of_records = list_of_records_config
-
-        # Set the config variable and reset the state
-        self.config_vars[self.list_of_records_name] = self.list_of_records
-        self.list_of_records = None
+            list_of_records_config_file.append(new_entry)
+        return list_of_records_config_file
 
     def _process_html_block(self, child):
         """
@@ -324,14 +366,48 @@ def _process_html_block(self, child):
         """
 
         body = child.body.strip()
+
+        # This comment marks that we should skip the next element
         if body == "<!-- eth2spec: skip -->":
-            self.should_skip = True
+            self._skip_element()
+
         # Handle list-of-records tables
+        # This comment marks that the next table is a list-of-records
+        # e.g. <!-- list-of-records: <name> -->
         match = re.match(
             r"<!--\s*list-of-records:([a-zA-Z0-9_-]+)\s*-->", body)
         if match:
-            self.list_of_records = []
-            self.list_of_records_name = match.group(1).upper()
+            table_element = self._get_next_element()
+            if not isinstance(table_element, Table):
+                raise Exception(
+                    f"expected table after list-of-records comment, got {type(table_element)}")
+            self._process_list_of_records_table(table_element, match.group(1).upper())
+
+    def _finalize_types(self):
+        """
+        Processes all_custom_types into custom_types and preset_dep_custom_types.
+        Calls helper functions to update KZG and CURDLEPROOFS setups if needed.
+        """
+        # Update KZG trusted setup if needed
+        if any('KZG_SETUP' in name for name in self.constant_vars):
+            _update_constant_vars_with_kzg_setups(
+                self.constant_vars, self.preset_dep_constant_vars, self.preset_name
+            )
+
+        # Update CURDLEPROOFS CRS if needed
+        if any('CURDLEPROOFS_CRS' in name for name in self.constant_vars):
+            _update_constant_vars_with_curdleproofs_crs(
+                self.constant_vars, self.preset_dep_constant_vars, self.preset_name
+            )
+
+        # Split all_custom_types into custom_types and preset_dep_custom_types
+        self.custom_types = {}
+        self.preset_dep_custom_types = {}
+        for name, value in self.all_custom_types.items():
+            if any(k in value for k in self.preset) or any(k in value for k in self.preset_dep_constant_vars):
+                self.preset_dep_custom_types[name] = value
+            else:
+                self.custom_types[name] = value
 
     def _build_spec_object(self):
         """
@@ -368,12 +444,13 @@ def _get_source_from_code_block(block: FencedCode) -> str:
 @lru_cache(maxsize=None)
 def _get_function_name_from_source(source: str) -> str:
     fn = ast.parse(source).body[0]
+    if not isinstance(fn, ast.FunctionDef):
+        raise Exception("expected function definition")
     return fn.name
 
 
 @lru_cache(maxsize=None)
-def _get_self_type_from_source(source: str) -> Optional[str]:
-    fn = ast.parse(source).body[0]
+def _get_self_type_from_source(fn: ast.FunctionDef) -> Optional[str]:
     args = fn.args.args
     if len(args) == 0:
         return None
@@ -385,9 +462,8 @@ def _get_self_type_from_source(source: str) -> Optional[str]:
 
 
 @lru_cache(maxsize=None)
-def _get_class_info_from_source(source: str) -> Tuple[str, Optional[str]]:
-    class_def = ast.parse(source).body[0]
-    base = class_def.bases[0]
+def _get_class_info_from_ast(cls: ast.ClassDef) -> Tuple[str, Optional[str]]:
+    base = cls.bases[0]
     if isinstance(base, ast.Name):
         parent_class = base.id
     elif isinstance(base, ast.Subscript):
@@ -397,11 +473,22 @@ def _get_class_info_from_source(source: str) -> Tuple[str, Optional[str]]:
         # e.g. `phase0.SignedBeaconBlock`
         # TODO: check for consistency with other phases
         parent_class = None
-    return class_def.name, parent_class
+    return cls.name, parent_class
 
 
 @lru_cache(maxsize=None)
 def _is_constant_id(name: str) -> bool:
+    """
+    Check if the given name follows the convention for constant identifiers.
+    A valid constant identifier must:
+    - Start with an uppercase ASCII letter or an underscore ('_').
+    - All subsequent characters (if any) must be uppercase ASCII letters, underscores, or digits.
+    Args:
+        name (str): The identifier name to check.
+    Returns:
+        bool: True if the name is a valid constant identifier, False otherwise.
+    """
+
     if name[0] not in string.ascii_uppercase + '_':
         return False
     return all(map(lambda c: c in string.ascii_uppercase + '_' + string.digits, name[1:]))
@@ -486,7 +573,7 @@ def _update_constant_vars_with_curdleproofs_crs(constant_vars, preset_dep_consta
 
 
 @lru_cache(maxsize=None)
-def parse_markdown(content: str):
+def parse_markdown(content: str) -> Document:
     return gfm.parse(content)
 
 
@@ -511,3 +598,12 @@ def check_yaml_matches_spec(var_name, yaml, value_def):
     except NameError:
         # Okay it's probably something more serious, let's ignore
         pass
+
+def _has_decorator(decorateable: ast.expr, name: str) -> bool:
+    return any(_is_decorator(d, name) for d in decorateable.decorator_list)
+
+def _is_decorator(decorator: ast.expr, name: str) -> bool:
+    return (isinstance(decorator, ast.Name) and decorator.id == name) or \
+            (isinstance(decorator, ast.Attribute) and decorator.attr == name) or \
+            (isinstance(decorator, ast.Call) and decorator.func.id == name) or \
+            (isinstance(decorator, ast.Subscript) and decorator.value.id == name)
diff --git a/setup.py b/setup.py
index afefe7b565..221d20f8bc 100644
--- a/setup.py
+++ b/setup.py
@@ -267,7 +267,7 @@ def get_spec(file_name: Path, preset: Dict[str, str], config: Dict[str, str], pr
                         protocols[self_type_name] = ProtocolDefinition(functions={})
                     protocols[self_type_name].functions[current_name] = function_def
             elif source.startswith("@dataclass"):
-                dataclasses[current_name] = "\n".join(line.rstrip() for line in source.splitlines())
+                dataclasses[ast.parse(source).body[0].name] = "\n".join(line.rstrip() for line in source.splitlines())
             elif source.startswith("class"):
                 class_name, parent_class = _get_class_info_from_source(source)
                 # check consistency with spec
@@ -482,7 +482,7 @@ def build_spec(fork: str,
     config = load_config(config_file)
     all_specs = [get_spec_new(spec, preset, config, preset_name) for spec in source_files]
     all_specs_old = [get_spec(spec, preset, config, preset_name) for spec in source_files]
-    
+
     assert DeepDiff(all_specs, all_specs_old, ignore_order=True) == {}, f"specs differ: {DeepDiff(all_specs, all_specs_old, ignore_order=True)}"
 
     spec_object = all_specs[0]

From 7afaf9f23bdb56000b117692e96e9d1069e738b7 Mon Sep 17 00:00:00 2001
From: Leo Lara <leo@leolara.me>
Date: Sun, 18 May 2025 10:22:13 +0000
Subject: [PATCH 03/18] Third checkpoint

---
 pysetup/mk_to_spec.py | 110 +++++++++++++++++++++---------------------
 1 file changed, 55 insertions(+), 55 deletions(-)

diff --git a/pysetup/mk_to_spec.py b/pysetup/mk_to_spec.py
index c17689bb4e..feb914f5da 100644
--- a/pysetup/mk_to_spec.py
+++ b/pysetup/mk_to_spec.py
@@ -21,19 +21,23 @@ def __init__(self, file_name: Path, preset: Dict[str, str], config: Dict[str, st
         self.config = config
         self.preset_name = preset_name
 
-        self.functions: Dict[str, str] = {}
-        self.protocols: Dict[str, ProtocolDefinition] = {}
-        self.constant_vars: Dict[str, VariableDefinition] = {}
-        self.preset_dep_constant_vars: Dict[str, VariableDefinition] = {}
-        self.preset_vars: Dict[str, VariableDefinition] = {}
-        self.config_vars: Dict[str, VariableDefinition] = {}
-        self.ssz_dep_constants: Dict[str, str] = {}
-        self.func_dep_presets: Dict[str, str] = {}
-        self.ssz_objects: Dict[str, str] = {}
-        self.dataclasses: Dict[str, str] = {}
+        # Use a single dict to hold all SpecObject fields
+        self.spec = {
+            "functions": {},
+            "protocols": {},
+            "custom_types": {},
+            "preset_dep_custom_types": {},
+            "constant_vars": {},
+            "preset_dep_constant_vars": {},
+            "preset_vars": {},
+            "config_vars": {},
+            "ssz_dep_constants": {},
+            "func_dep_presets": {},
+            "ssz_objects": {},
+            "dataclasses": {},
+        }
+
         self.all_custom_types: Dict[str, str] = {}
-        self.custom_types: Dict[str, str] = {}
-        self.preset_dep_custom_types: Dict[str, str] = {}
 
         self.document_iterator: Iterator[Element] = self._parse_document(file_name)
         self.current_heading_name: str | None = None
@@ -68,7 +72,6 @@ def _get_next_element(self) -> Optional[Element]:
     def _skip_element(self) -> None:
         """
         Skips the current element in the document.
-        This is a placeholder for future functionality.
         """
         self._get_next_element()
 
@@ -101,12 +104,9 @@ def _process_child(self, child: Element):
 
     def _process_heading(self, heading: Heading):
         """
-        Extracts the section name from the heading and updates current_name for context.
+        Extracts the section name from the heading and updates current_heading_name for context.
         """
-        if not isinstance(heading, Heading):
-            return
         self.current_heading_name = _get_name_from_heading(heading)
-        # else: skip unknown types
 
     def _process_code_block(self, code_block: FencedCode):
         """
@@ -119,9 +119,9 @@ def _process_code_block(self, code_block: FencedCode):
             return
 
         source = _get_source_from_code_block(code_block)
-        module = ast.parse(source)
-
         clean_source = "\n".join(line.rstrip() for line in source.splitlines())
+
+        module = ast.parse(source)
         # AST container of the first definition in the block
         first_def = module.body[0] 
 
@@ -147,7 +147,7 @@ def _process_code_def(self, source: str, fn: ast.FunctionDef):
         self_type_name = _get_self_type_from_source(fn)
         
         if self_type_name is None:
-            self.functions[fn.name] = source
+            self.spec["functions"][fn.name] = source
         else:
             self._add_protocol_function(self_type_name, fn.name, source)
 
@@ -156,13 +156,13 @@ def _add_protocol_function(self, protocol_name: str, function_name: str, functio
         Adds a function definition to the protocol functions dictionary.
         """
 
-        if protocol_name not in self.protocols:
-            self.protocols[protocol_name] = ProtocolDefinition(
+        if protocol_name not in self.spec["protocols"]:
+            self.spec["protocols"][protocol_name] = ProtocolDefinition(
                 functions={})
-        self.protocols[protocol_name].functions[function_name] = function_def
+        self.spec["protocols"][protocol_name].functions[function_name] = function_def
 
     def _add_dataclass(self, source, cls: ast.ClassDef):
-        self.dataclasses[cls.name] = source
+        self.spec["dataclasses"][cls.name] = source
 
     def _process_code_class(self, source, cls: ast.ClassDef):
         class_name, parent_class = _get_class_info_from_ast(cls)
@@ -173,7 +173,7 @@ def _process_code_class(self, source, cls: ast.ClassDef):
 
         if parent_class:
             assert parent_class == "Container"
-        self.ssz_objects[class_name] = source
+        self.spec["ssz_objects"][class_name] = source
 
     def _process_table(self, child: HTMLBlock):
         """
@@ -203,12 +203,12 @@ def _process_table(self, child: HTMLBlock):
 
             # It is a constant name and a generalized index
             if value.startswith("get_generalized_index"):
-                self.ssz_dep_constants[name] = value
+                self.spec["ssz_dep_constants"][name] = value
                 continue
 
             # It is a constant and not a generalized index, and a function-dependent preset
             if description is not None and description.startswith("<!-- predefined -->"):
-                self.func_dep_presets[name] = value
+                self.spec["func_dep_presets"][name] = value
 
             # It is a constant and not a generalized index
             value_def = _parse_value(name, value)
@@ -217,24 +217,24 @@ def _process_table(self, child: HTMLBlock):
                 if self.preset_name == "mainnet":
                     check_yaml_matches_spec(name, self.preset, value_def)
 
-                self.preset_vars[name] = VariableDefinition(value_def.type_name, self.preset[name], value_def.comment, None)
+                self.spec["preset_vars"][name] = VariableDefinition(value_def.type_name, self.preset[name], value_def.comment, None)
 
             # It is a config variable
             elif name in self.config:
                 if self.preset_name == "mainnet":
                     check_yaml_matches_spec(name, self.config, value_def)
 
-                self.config_vars[name] = VariableDefinition(value_def.type_name, self.config[name], value_def.comment, None)
+                self.spec["config_vars"][name] = VariableDefinition(value_def.type_name, self.config[name], value_def.comment, None)
 
             # It is a constant variable or a preset_dep_constant_vars
             else:
                 if name in ('ENDIANNESS', 'KZG_ENDIANNESS'):
                     # Deal with mypy Literal typing check
                     value_def = _parse_value(name, value, type_hint='Final')
-                if any(k in value for k in self.preset) or any(k in value for k in self.preset_dep_constant_vars):
-                    self.preset_dep_constant_vars[name] = value_def
+                if any(k in value for k in self.preset) or any(k in value for k in self.spec["preset_dep_constant_vars"]):
+                    self.spec["preset_dep_constant_vars"][name] = value_def
                 else:
-                    self.constant_vars[name] = value_def
+                    self.spec["constant_vars"][name] = value_def
 
     @staticmethod
     def _get_table_row_fields(row: Element) -> tuple[str, str, Optional[str]]:
@@ -298,7 +298,7 @@ def _process_list_of_records_table(self, child, list_of_records_name):
                 f"list of records mismatch: {list_of_records_spec} vs {list_of_records_config_file}"
 
         # Set the config variable
-        self.config_vars[list_of_records_name] = list_of_records_config_file
+        self.spec["config_vars"][list_of_records_name] = list_of_records_config_file
 
     @staticmethod
     def _make_list_of_records_type_map(list_of_records: list[dict[str, str]]) -> dict[str, str]:
@@ -389,43 +389,43 @@ def _finalize_types(self):
         Calls helper functions to update KZG and CURDLEPROOFS setups if needed.
         """
         # Update KZG trusted setup if needed
-        if any('KZG_SETUP' in name for name in self.constant_vars):
+        if any('KZG_SETUP' in name for name in self.spec["constant_vars"]):
             _update_constant_vars_with_kzg_setups(
-                self.constant_vars, self.preset_dep_constant_vars, self.preset_name
+                self.spec["constant_vars"], self.spec["preset_dep_constant_vars"], self.preset_name
             )
 
         # Update CURDLEPROOFS CRS if needed
-        if any('CURDLEPROOFS_CRS' in name for name in self.constant_vars):
+        if any('CURDLEPROOFS_CRS' in name for name in self.spec["constant_vars"]):
             _update_constant_vars_with_curdleproofs_crs(
-                self.constant_vars, self.preset_dep_constant_vars, self.preset_name
+                self.spec["constant_vars"], self.spec["preset_dep_constant_vars"], self.preset_name
             )
 
         # Split all_custom_types into custom_types and preset_dep_custom_types
-        self.custom_types = {}
-        self.preset_dep_custom_types = {}
+        self.spec["custom_types"] = {}
+        self.spec["preset_dep_custom_types"] = {}
         for name, value in self.all_custom_types.items():
-            if any(k in value for k in self.preset) or any(k in value for k in self.preset_dep_constant_vars):
-                self.preset_dep_custom_types[name] = value
+            if any(k in value for k in self.preset) or any(k in value for k in self.spec["preset_dep_constant_vars"]):
+                self.spec["preset_dep_custom_types"][name] = value
             else:
-                self.custom_types[name] = value
+                self.spec["custom_types"][name] = value
 
     def _build_spec_object(self):
         """
-        Constructs and returns the SpecObject using all collected data.
+        Returns the SpecObject using all collected data.
         """
         return SpecObject(
-            functions=self.functions,
-            protocols=self.protocols,
-            custom_types=self.custom_types,
-            preset_dep_custom_types=self.preset_dep_custom_types,
-            constant_vars=self.constant_vars,
-            preset_dep_constant_vars=self.preset_dep_constant_vars,
-            preset_vars=self.preset_vars,
-            config_vars=self.config_vars,
-            ssz_dep_constants=self.ssz_dep_constants,
-            func_dep_presets=self.func_dep_presets,
-            ssz_objects=self.ssz_objects,
-            dataclasses=self.dataclasses,
+            functions=self.spec["functions"],
+            protocols=self.spec["protocols"],
+            custom_types=self.spec["custom_types"],
+            preset_dep_custom_types=self.spec["preset_dep_custom_types"],
+            constant_vars=self.spec["constant_vars"],
+            preset_dep_constant_vars=self.spec["preset_dep_constant_vars"],
+            preset_vars=self.spec["preset_vars"],
+            config_vars=self.spec["config_vars"],
+            ssz_dep_constants=self.spec["ssz_dep_constants"],
+            func_dep_presets=self.spec["func_dep_presets"],
+            ssz_objects=self.spec["ssz_objects"],
+            dataclasses=self.spec["dataclasses"],
         )
 
 @lru_cache(maxsize=None)

From ac7a1dc7491483632aaafae7044b022e6f8e87b4 Mon Sep 17 00:00:00 2001
From: Leo Lara <leo@leolara.me>
Date: Tue, 20 May 2025 11:44:11 +0000
Subject: [PATCH 04/18] Add tests

---
 Makefile                       |   2 +-
 pysetup/mk_to_spec.py          |  39 ++--
 tests/infra/test_mk_to_spec.py | 333 +++++++++++++++++++++++++++++++++
 3 files changed, 360 insertions(+), 14 deletions(-)
 create mode 100644 tests/infra/test_mk_to_spec.py

diff --git a/Makefile b/Makefile
index 25be7bd587..ed9a5270c1 100644
--- a/Makefile
+++ b/Makefile
@@ -117,7 +117,7 @@ test: pyspec
 		$(PRESET) \
 		$(BLS) \
 		--junitxml=$(TEST_REPORT_DIR)/test_results.xml \
-		$(PYSPEC_DIR)/eth2spec
+		$(CURDIR)/tests/infra $(PYSPEC_DIR)/eth2spec
 
 ###############################################################################
 # Coverage
diff --git a/pysetup/mk_to_spec.py b/pysetup/mk_to_spec.py
index feb914f5da..fea0b90a2f 100644
--- a/pysetup/mk_to_spec.py
+++ b/pysetup/mk_to_spec.py
@@ -10,7 +10,7 @@
 from marko.block import BlankLine, Heading, FencedCode, HTMLBlock, Document
 from marko.element import Element
 from marko.ext.gfm import gfm
-from marko.ext.gfm.elements import Table
+from marko.ext.gfm.elements import Table, TableRow, TableCell
 from marko.inline import CodeSpan
 
 from .typing import ProtocolDefinition, VariableDefinition, SpecObject
@@ -86,7 +86,7 @@ def _parse_document(self, file_name: Path) -> Iterator[Element]:
             document = parse_markdown(source_file.read())
             return iter(document.children)
 
-    def _process_child(self, child: Element):
+    def _process_child(self, child: Element) -> None:
         # Skip blank lines
         if isinstance(child, BlankLine):
             return
@@ -102,13 +102,13 @@ def _process_child(self, child: Element):
             case HTMLBlock():
                 self._process_html_block(child)
 
-    def _process_heading(self, heading: Heading):
+    def _process_heading(self, heading: Heading) -> None:
         """
         Extracts the section name from the heading and updates current_heading_name for context.
         """
         self.current_heading_name = _get_name_from_heading(heading)
 
-    def _process_code_block(self, code_block: FencedCode):
+    def _process_code_block(self, code_block: FencedCode) -> None:
         """
         Processes a FencedCode block:
         - Checks if the code block is Python.
@@ -134,7 +134,7 @@ def _process_code_block(self, code_block: FencedCode):
         else:
             raise Exception("unrecognized python code element: " + source)
 
-    def _process_code_def(self, source: str, fn: ast.FunctionDef):
+    def _process_code_def(self, source: str, fn: ast.FunctionDef) -> None:
         """
         Processes a function definition node from the AST and stores its source code representation.
         If the function is a method (i.e., has a self type), it is added to the protocol functions for that type.
@@ -151,7 +151,7 @@ def _process_code_def(self, source: str, fn: ast.FunctionDef):
         else:
             self._add_protocol_function(self_type_name, fn.name, source)
 
-    def _add_protocol_function(self, protocol_name: str, function_name: str, function_def: str):
+    def _add_protocol_function(self, protocol_name: str, function_name: str, function_def: str) -> None:
         """
         Adds a function definition to the protocol functions dictionary.
         """
@@ -161,10 +161,23 @@ def _add_protocol_function(self, protocol_name: str, function_name: str, functio
                 functions={})
         self.spec["protocols"][protocol_name].functions[function_name] = function_def
 
-    def _add_dataclass(self, source, cls: ast.ClassDef):
+    def _add_dataclass(self, source, cls: ast.ClassDef) -> None:
         self.spec["dataclasses"][cls.name] = source
 
-    def _process_code_class(self, source, cls: ast.ClassDef):
+    def _process_code_class(self, source, cls: ast.ClassDef) -> None:
+        """
+        Processes an AST class definition node, validates its consistency with the current heading,
+        and updates the spec dictionary with the class source code.
+        Args:
+            source (str): The source code of the class.
+            cls (ast.ClassDef): The AST node representing the class definition.
+        Raises:
+            Exception: If the class name does not match the current heading name.
+            AssertionError: If the parent class is not 'Container' when a parent class is present.
+        Side Effects:
+            Updates self.spec["ssz_objects"] with the class source code, keyed by class name.
+        """
+
         class_name, parent_class = _get_class_info_from_ast(cls)
 
         # check consistency with spec
@@ -175,7 +188,7 @@ def _process_code_class(self, source, cls: ast.ClassDef):
             assert parent_class == "Container"
         self.spec["ssz_objects"][class_name] = source
 
-    def _process_table(self, child: HTMLBlock):
+    def _process_table(self, table: Table):
         """
         Handles standard tables (not list-of-records).
         Iterates over rows, extracting variable names, values, and descriptions.
@@ -184,7 +197,7 @@ def _process_table(self, child: HTMLBlock):
         Handles special cases for predefined types and function-dependent presets.
         """
 
-        for row in child.children:
+        for row in table.children:
             if len(row.children) < 2:
                 continue
 
@@ -210,7 +223,7 @@ def _process_table(self, child: HTMLBlock):
             if description is not None and description.startswith("<!-- predefined -->"):
                 self.spec["func_dep_presets"][name] = value
 
-            # It is a constant and not a generalized index
+            # It is a constant and not a generalized index, and not a function-dependent preset
             value_def = _parse_value(name, value)
             # It is a preset
             if name in self.preset:
@@ -237,7 +250,7 @@ def _process_table(self, child: HTMLBlock):
                     self.spec["constant_vars"][name] = value_def
 
     @staticmethod
-    def _get_table_row_fields(row: Element) -> tuple[str, str, Optional[str]]:
+    def _get_table_row_fields(row: TableRow) -> tuple[str, str, Optional[str]]:
         """
         Extracts the name, value, and description fields from a table row element.
         Description can be None.
@@ -599,7 +612,7 @@ def check_yaml_matches_spec(var_name, yaml, value_def):
         # Okay it's probably something more serious, let's ignore
         pass
 
-def _has_decorator(decorateable: ast.expr, name: str) -> bool:
+def _has_decorator(decorateable: ast.ClassDef | ast.FunctionDef, name: str) -> bool:
     return any(_is_decorator(d, name) for d in decorateable.decorator_list)
 
 def _is_decorator(decorator: ast.expr, name: str) -> bool:
diff --git a/tests/infra/test_mk_to_spec.py b/tests/infra/test_mk_to_spec.py
new file mode 100644
index 0000000000..829c8ff155
--- /dev/null
+++ b/tests/infra/test_mk_to_spec.py
@@ -0,0 +1,333 @@
+import pytest
+from pathlib import Path
+from pysetup.mk_to_spec import MarkdownToSpec
+
+
+@pytest.fixture
+def dummy_preset():
+    return {"EXAMPLE": "1"}
+
+
+@pytest.fixture
+def dummy_config():
+    return {"CONFIG": "2"}
+
+
+@pytest.fixture
+def dummy_file(tmp_path):
+    file = tmp_path / "dummy.md"
+    file.write_text("# Dummy\n")
+    return file
+
+
+def test_constructor_initializes_fields(dummy_file, dummy_preset, dummy_config):
+    preset_name = "mainnet"
+    m2s = MarkdownToSpec(
+        file_name=Path(dummy_file),
+        preset=dummy_preset,
+        config=dummy_config,
+        preset_name=preset_name,
+    )
+    assert m2s.preset == dummy_preset
+    assert m2s.config == dummy_config
+    assert m2s.preset_name == preset_name
+    assert isinstance(m2s.spec, dict)
+    assert isinstance(m2s.all_custom_types, dict)
+    assert hasattr(m2s, "document_iterator")
+    assert m2s.current_heading_name is None
+
+
+def test_run_returns_spec_object(dummy_file, dummy_preset, dummy_config):
+    preset_name = "mainnet"
+    m2s = MarkdownToSpec(
+        file_name=Path(dummy_file),
+        preset=dummy_preset,
+        config=dummy_config,
+        preset_name=preset_name,
+    )
+    spec_obj = m2s.run()
+    # Check that the result is of the expected type
+    from pysetup.typing import SpecObject
+
+    assert isinstance(spec_obj, SpecObject)
+
+
+def test_run_includes_table_in_specobject(tmp_path, dummy_preset, dummy_config):
+    # Create a markdown file with a simple markdown table
+    md_content = """
+# Example
+
+| Name    | Value        | Description       |
+|---------|--------------|------------------|
+| CONST_A | uint64(42)   | Example constant |
+| CONST_B | Bytes32(0x01)| Another constant |
+"""
+    file = tmp_path / "table.md"
+    file.write_text(md_content)
+    m2s = MarkdownToSpec(
+        file_name=Path(file),
+        preset=dummy_preset,
+        config=dummy_config,
+        preset_name="mainnet",
+    )
+    spec_obj = m2s.run()
+    # The constant should be present in the SpecObject's constant_vars
+    assert "CONST_A" in spec_obj.constant_vars
+    assert spec_obj.constant_vars["CONST_A"].type_name == "uint64"
+    assert spec_obj.constant_vars["CONST_A"].value == "42"
+    assert "CONST_B" in spec_obj.constant_vars
+    assert spec_obj.constant_vars["CONST_B"].type_name == "Bytes32"
+    assert spec_obj.constant_vars["CONST_B"].value == "0x01"
+
+
+def test_run_includes_list_of_records_table(tmp_path, dummy_preset, dummy_config):
+    md_content = """
+<!-- list-of-records:blob_schedule -->
+
+| Epoch                       | Max Blobs Per Block | Description                      |
+| --------------------------- | ------------------- | -------------------------------- |
+| `Epoch(269568)` **Deneb**   | `uint64(6)`         | The limit is set to `6` blobs    |
+| `Epoch(364032)` **Electra** | `uint64(9)`         | The limit is raised to `9` blobs |
+"""
+    file = tmp_path / "list_of_records.md"
+    file.write_text(md_content)
+    # The config must have a 'BLOB_SCHEDULE' key with the expected structure for mainnet
+    config = dummy_config.copy()
+    config["BLOB_SCHEDULE"] = [
+        {"EPOCH": "269568", "MAX_BLOBS_PER_BLOCK": "6"},
+        {"EPOCH": "364032", "MAX_BLOBS_PER_BLOCK": "9"},
+    ]
+    m2s = MarkdownToSpec(
+        file_name=Path(file),
+        preset=dummy_preset,
+        config=config,
+        preset_name="mainnet",
+    )
+    spec_obj = m2s.run()
+    # The result should have 'BLOB_SCHEDULE' in config_vars
+    assert "BLOB_SCHEDULE" in spec_obj.config_vars
+    # The value should be a list of dicts with type constructors applied
+    assert isinstance(spec_obj.config_vars["BLOB_SCHEDULE"], list)
+    assert spec_obj.config_vars["BLOB_SCHEDULE"][0]["EPOCH"] == "Epoch(269568)"
+    assert spec_obj.config_vars["BLOB_SCHEDULE"][0]["MAX_BLOBS_PER_BLOCK"] == "uint64(6)"
+    assert spec_obj.config_vars["BLOB_SCHEDULE"][1]["EPOCH"] == "Epoch(364032)"
+    assert spec_obj.config_vars["BLOB_SCHEDULE"][1]["MAX_BLOBS_PER_BLOCK"] == "uint64(9)"
+
+
+def test_run_includes_list_of_records_table_minimal(tmp_path, dummy_preset, dummy_config):
+    md_content = """
+<!-- list-of-records:blob_schedule -->
+
+| Epoch                       | Max Blobs Per Block | Description                      |
+| --------------------------- | ------------------- | -------------------------------- |
+| `Epoch(269568)` **Deneb**   | `uint64(6)`         | The limit is set to `6` blobs    |
+| `Epoch(364032)` **Electra** | `uint64(9)`         | The limit is raised to `9` blobs |
+"""
+    file = tmp_path / "list_of_records_minimal.md"
+    file.write_text(md_content)
+    config = dummy_config.copy()
+    # Use different values than the table for minimal preset
+    config["BLOB_SCHEDULE"] = [
+        {"EPOCH": "2", "MAX_BLOBS_PER_BLOCK": "3"},
+        {"EPOCH": "4", "MAX_BLOBS_PER_BLOCK": "5"},
+    ]
+    m2s = MarkdownToSpec(
+        file_name=Path(file),
+        preset=dummy_preset,
+        config=config,
+        preset_name="minimal",
+    )
+    spec_obj = m2s.run()
+    assert "BLOB_SCHEDULE" in spec_obj.config_vars
+    assert isinstance(spec_obj.config_vars["BLOB_SCHEDULE"], list)
+    # The result should follow the config, not the table
+    assert spec_obj.config_vars["BLOB_SCHEDULE"][0]["EPOCH"] == "Epoch(2)"
+    assert spec_obj.config_vars["BLOB_SCHEDULE"][0]["MAX_BLOBS_PER_BLOCK"] == "uint64(3)"
+    assert spec_obj.config_vars["BLOB_SCHEDULE"][1]["EPOCH"] == "Epoch(4)"
+    assert spec_obj.config_vars["BLOB_SCHEDULE"][1]["MAX_BLOBS_PER_BLOCK"] == "uint64(5)"
+
+
+def test_run_includes_python_function(tmp_path, dummy_preset, dummy_config):
+    md_content = """
+#### `compute_epoch_at_slot`
+
+```python
+def compute_epoch_at_slot(slot: Slot) -> Epoch:
+    \"\"\"
+    Return the epoch number at slot.
+    \"\"\"
+    return Epoch(slot // SLOTS_PER_EPOCH)
+```
+"""
+    file = tmp_path / "function.md"
+    file.write_text(md_content)
+    m2s = MarkdownToSpec(
+        file_name=Path(file),
+        preset=dummy_preset,
+        config=dummy_config,
+        preset_name="mainnet",
+    )
+    spec_obj = m2s.run()
+    # The function should be present in the SpecObject's functions
+    assert "compute_epoch_at_slot" in spec_obj.functions
+    func_src = spec_obj.functions["compute_epoch_at_slot"]
+    assert "def compute_epoch_at_slot(slot: Slot) -> Epoch" in func_src
+    assert "return Epoch(slot // SLOTS_PER_EPOCH)" in func_src
+
+
+def test_run_includes_python_class_container(tmp_path, dummy_preset, dummy_config):
+    md_content = """
+#### `Checkpoint`
+
+```python
+class Checkpoint(Container):
+    epoch: Epoch
+    root: Root
+```
+"""
+    file = tmp_path / "class_container.md"
+    file.write_text(md_content)
+    m2s = MarkdownToSpec(
+        file_name=Path(file),
+        preset=dummy_preset,
+        config=dummy_config,
+        preset_name="mainnet",
+    )
+    spec_obj = m2s.run()
+    # The class should be present in the SpecObject's ssz_objects
+    assert "Checkpoint" in spec_obj.ssz_objects
+    class_src = spec_obj.ssz_objects["Checkpoint"]
+    assert "class Checkpoint(Container):" in class_src
+    assert "epoch: Epoch" in class_src
+    assert "root: Root" in class_src
+
+
+def test_run_includes_python_dataclass(tmp_path, dummy_preset, dummy_config):
+    md_content = """
+## Helpers
+
+### `PayloadAttributes`
+
+Used to signal to initiate the payload build process via `notify_forkchoice_updated`.
+
+```python
+@dataclass
+class PayloadAttributes(object):
+    timestamp: uint64
+    prev_randao: Bytes32
+    suggested_fee_recipient: ExecutionAddress
+```
+"""
+    file = tmp_path / "dataclass.md"
+    file.write_text(md_content)
+    m2s = MarkdownToSpec(
+        file_name=Path(file),
+        preset=dummy_preset,
+        config=dummy_config,
+        preset_name="mainnet",
+    )
+    spec_obj = m2s.run()
+    # The dataclass should be present in the SpecObject's dataclasses
+    assert "PayloadAttributes" in spec_obj.dataclasses
+    class_src = spec_obj.dataclasses["PayloadAttributes"]
+    assert "@dataclass" in class_src
+    assert "class PayloadAttributes(object):" in class_src
+    assert "timestamp: uint64" in class_src
+    assert "prev_randao: Bytes32" in class_src
+    assert "suggested_fee_recipient: ExecutionAddress" in class_src
+
+
+def test_run_skips_predefined_type_rows(tmp_path, dummy_preset, dummy_config):
+    md_content = """
+## Cryptographic types
+
+| Name                                                                                                                                                    | SSZ equivalent                                       | Description                                                  |
+| ------------------------------------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------- | ------------------------------------------------------------ |
+| [`PolynomialCoeff`](https://github.com/ethereum/consensus-specs/blob/36a5719b78523c057065515c8f8fcaeba75d065b/pysetup/spec_builders/eip7594.py#L20-L24) | `List[BLSFieldElement, FIELD_ELEMENTS_PER_EXT_BLOB]` | <!-- predefined-type --> A polynomial in coefficient form    |
+| [`Coset`](https://github.com/ethereum/consensus-specs/blob/36a5719b78523c057065515c8f8fcaeba75d065b/pysetup/spec_builders/eip7594.py#L27-L33)           | `Vector[BLSFieldElement, FIELD_ELEMENTS_PER_CELL]`   | <!-- predefined-type --> The evaluation domain of a cell     |
+| [`CosetEvals`](https://github.com/ethereum/consensus-specs/blob/36a5719b78523c057065515c8f8fcaeba75d065b/pysetup/spec_builders/eip7594.py#L36-L42)      | `Vector[BLSFieldElement, FIELD_ELEMENTS_PER_CELL]`   | <!-- predefined-type --> A cell's evaluations over its coset |
+"""
+    file = tmp_path / "predefined_types.md"
+    file.write_text(md_content)
+    m2s = MarkdownToSpec(
+        file_name=Path(file),
+        preset=dummy_preset,
+        config=dummy_config,
+        preset_name="mainnet",
+    )
+    spec_obj = m2s.run()
+    # These should not be in custom_types or constant_vars due to <!-- predefined-type -->
+    assert "PolynomialCoeff" not in spec_obj.custom_types
+    assert "Coset" not in spec_obj.custom_types
+    assert "CosetEvals" not in spec_obj.custom_types
+    assert "PolynomialCoeff" not in spec_obj.constant_vars
+    assert "Coset" not in spec_obj.constant_vars
+    assert "CosetEvals" not in spec_obj.constant_vars
+
+
+def test_run_skips_eth2spec_skip_code_block(tmp_path, dummy_preset, dummy_config):
+    md_content = """
+## Helpers
+
+### `PayloadAttributes`
+
+Used to signal to initiate the payload build process via `notify_forkchoice_updated`.
+
+<!-- eth2spec: skip -->
+```python
+@dataclass
+class PayloadAttributes(object):
+    timestamp: uint64
+    prev_randao: Bytes32
+    suggested_fee_recipient: ExecutionAddress
+```
+"""
+    file = tmp_path / "dataclass_skip.md"
+    file.write_text(md_content)
+    m2s = MarkdownToSpec(
+        file_name=Path(file),
+        preset=dummy_preset,
+        config=dummy_config,
+        preset_name="mainnet",
+    )
+    spec_obj = m2s.run()
+    # The dataclass should NOT be present in the SpecObject's dataclasses
+    assert "PayloadAttributes" not in spec_obj.dataclasses
+
+
+def test_finalize_types_called_and_updates_custom_types(
+    tmp_path, dummy_preset, dummy_config, monkeypatch
+):
+    # Minimal markdown with a type definition
+    md_content = """
+# Types
+
+| Name             | SSZ equivalent | Description                       |
+| ---------------- | -------------- | --------------------------------- |
+| `Slot`           | `uint64`       | a slot number                     |
+| `Epoch`          | `uint64`       | an epoch number                   |
+"""
+    file = tmp_path / "types.md"
+    file.write_text(md_content)
+    m2s = MarkdownToSpec(
+        file_name=Path(file),
+        preset=dummy_preset,
+        config=dummy_config,
+        preset_name="mainnet",
+    )
+
+    # Spy on _finalize_types
+    called = {}
+    orig_finalize_types = m2s._finalize_types
+
+    def spy_finalize_types():
+        called["ran"] = True
+        return orig_finalize_types()
+
+    monkeypatch.setattr(m2s, "_finalize_types", spy_finalize_types)
+
+    spec_obj = m2s.run()
+    assert called.get("ran") is True
+    # After _finalize_types, custom_types should include 'Slot' and 'Epoch'
+    assert spec_obj.custom_types["Slot"] == "uint64"
+    assert spec_obj.custom_types["Epoch"] == "uint64"

From 42c9d46e7a827d46863a0707438827171b80d198 Mon Sep 17 00:00:00 2001
From: Leo Lara <leo@leolara.me>
Date: Tue, 20 May 2025 11:53:49 +0000
Subject: [PATCH 05/18] Remove whitespaces

---
 Makefile                       | 2 +-
 pysetup/mk_to_spec.py          | 6 ++----
 tests/infra/test_mk_to_spec.py | 4 +++-
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/Makefile b/Makefile
index b53fdaf657..70291413b4 100644
--- a/Makefile
+++ b/Makefile
@@ -248,4 +248,4 @@ kzg_setups: pyspec
 
 # Delete all untracked files.
 clean:
-	rm -fR venv .mypy_cache  build eth2spec.egg-info pysetup/__pycache__ pysetup/spec_builders/__pycache__ 
+	rm -fR venv .mypy_cache  build eth2spec.egg-info pysetup/__pycache__ pysetup/spec_builders/__pycache__
diff --git a/pysetup/mk_to_spec.py b/pysetup/mk_to_spec.py
index fea0b90a2f..7ce65cb54f 100644
--- a/pysetup/mk_to_spec.py
+++ b/pysetup/mk_to_spec.py
@@ -75,8 +75,6 @@ def _skip_element(self) -> None:
         """
         self._get_next_element()
 
-    
-
     def _parse_document(self, file_name: Path) -> Iterator[Element]:
         """
         Opens the markdown file, parses its content into a document object using _parse_markdown,
@@ -123,7 +121,7 @@ def _process_code_block(self, code_block: FencedCode) -> None:
 
         module = ast.parse(source)
         # AST container of the first definition in the block
-        first_def = module.body[0] 
+        first_def = module.body[0]
 
         if isinstance(first_def, ast.FunctionDef):
             self._process_code_def(clean_source, first_def)
@@ -145,7 +143,7 @@ def _process_code_def(self, source: str, fn: ast.FunctionDef) -> None:
         """
 
         self_type_name = _get_self_type_from_source(fn)
-        
+
         if self_type_name is None:
             self.spec["functions"][fn.name] = source
         else:
diff --git a/tests/infra/test_mk_to_spec.py b/tests/infra/test_mk_to_spec.py
index 829c8ff155..a75f5dd2ab 100644
--- a/tests/infra/test_mk_to_spec.py
+++ b/tests/infra/test_mk_to_spec.py
@@ -1,5 +1,7 @@
-import pytest
 from pathlib import Path
+
+import pytest
+
 from pysetup.mk_to_spec import MarkdownToSpec
 
 

From d4116246f6cce7edfbddfe3ad7be0a2c6df02407 Mon Sep 17 00:00:00 2001
From: Leo Lara <leo@leolara.me>
Date: Tue, 20 May 2025 15:05:38 +0000
Subject: [PATCH 06/18] Fix a few more type hints

---
 pysetup/mk_to_spec.py | 40 ++++++++++++++++++++--------------------
 1 file changed, 20 insertions(+), 20 deletions(-)

diff --git a/pysetup/mk_to_spec.py b/pysetup/mk_to_spec.py
index 7ce65cb54f..128828439a 100644
--- a/pysetup/mk_to_spec.py
+++ b/pysetup/mk_to_spec.py
@@ -2,7 +2,7 @@
 import json
 from pathlib import Path
 import string
-from typing import Dict, Optional, Tuple, Iterator
+from typing import Dict, Optional, Tuple, Iterator, cast
 import re
 from functools import lru_cache
 
@@ -16,13 +16,13 @@
 from .typing import ProtocolDefinition, VariableDefinition, SpecObject
 
 class MarkdownToSpec:
-    def __init__(self, file_name: Path, preset: Dict[str, str], config: Dict[str, str], preset_name: str):
+    def __init__(self, file_name: Path, preset: Dict[str, str], config: Dict[str, str | Dict[str, str]], preset_name: str):
         self.preset = preset
         self.config = config
         self.preset_name = preset_name
 
         # Use a single dict to hold all SpecObject fields
-        self.spec = {
+        self.spec: dict[str, dict] = {
             "functions": {},
             "protocols": {},
             "custom_types": {},
@@ -159,10 +159,10 @@ def _add_protocol_function(self, protocol_name: str, function_name: str, functio
                 functions={})
         self.spec["protocols"][protocol_name].functions[function_name] = function_def
 
-    def _add_dataclass(self, source, cls: ast.ClassDef) -> None:
+    def _add_dataclass(self, source: str, cls: ast.ClassDef) -> None:
         self.spec["dataclasses"][cls.name] = source
 
-    def _process_code_class(self, source, cls: ast.ClassDef) -> None:
+    def _process_code_class(self, source: str, cls: ast.ClassDef) -> None:
         """
         Processes an AST class definition node, validates its consistency with the current heading,
         and updates the spec dictionary with the class source code.
@@ -186,7 +186,7 @@ def _process_code_class(self, source, cls: ast.ClassDef) -> None:
             assert parent_class == "Container"
         self.spec["ssz_objects"][class_name] = source
 
-    def _process_table(self, table: Table):
+    def _process_table(self, table: Table) -> None:
         """
         Handles standard tables (not list-of-records).
         Iterates over rows, extracting variable names, values, and descriptions.
@@ -195,7 +195,7 @@ def _process_table(self, table: Table):
         Handles special cases for predefined types and function-dependent presets.
         """
 
-        for row in table.children:
+        for row in cast(list[TableRow], table.children):
             if len(row.children) < 2:
                 continue
 
@@ -253,7 +253,7 @@ def _get_table_row_fields(row: TableRow) -> tuple[str, str, Optional[str]]:
         Extracts the name, value, and description fields from a table row element.
         Description can be None.
         """
-        cells = row.children
+        cells = cast(list[TableCell], row.children)
         name_cell = cells[0]
         name = name_cell.children[0].children
 
@@ -275,7 +275,7 @@ def _get_table_row_fields(row: TableRow) -> tuple[str, str, Optional[str]]:
 
         return name, value, description
 
-    def _process_list_of_records_table(self, child, list_of_records_name):
+    def _process_list_of_records_table(self, table: Table, list_of_records_name: str) -> None:
         """
         Handles tables marked as 'list-of-records'.
         Extracts headers and rows, mapping field names and types.
@@ -292,7 +292,7 @@ def _process_list_of_records_table(self, child, list_of_records_name):
         The method _process_html_block calls this method when it encounters a comment
         of the form `<!-- list-of-records:name -->`.
         """
-        list_of_records_spec = self._extract_list_of_records_spec(child)
+        list_of_records_spec = self._extract_list_of_records_spec(table)
 
         # Make a type map from the spec definition
         type_map = self._make_list_of_records_type_map(list_of_records_spec)
@@ -327,14 +327,14 @@ def _make_list_of_records_type_map(list_of_records: list[dict[str, str]]) -> dic
         return type_map
 
     @staticmethod
-    def _extract_list_of_records_spec(child) -> list[dict[str, str]]:
+    def _extract_list_of_records_spec(table: Table) -> list[dict[str, str]]:
         """
         Extracts the list of records from a table element.
         Returns a list of dicts, each representing a row with field names as keys.
         """
 
         # Save the table header, used for field names (skip last item: description)
-        header_row = child.children[0]
+        header_row = cast(TableRow, table.children[0])
         list_of_records_spec_header = [
             re.sub(r'\s+', '_', value.children[0].children.upper())
             for value in header_row.children[:-1]
@@ -346,7 +346,7 @@ def _extract_list_of_records_spec(child) -> list[dict[str, str]]:
                 list_of_records_spec_header[j]: value.children[0].children
                 for j, value in enumerate(row.children[:-1])
             }
-            for row in child.children[1:]
+            for row in table.children[1:]
         ]
 
         return list_of_records_spec
@@ -370,13 +370,13 @@ def _extract_typed_records_config(
             list_of_records_config_file.append(new_entry)
         return list_of_records_config_file
 
-    def _process_html_block(self, child):
+    def _process_html_block(self, html: HTMLBlock) -> None:
         """
         Handles HTML comments for skip logic and list-of-records detection.
         Sets flags or state variables for the next iteration.
         """
 
-        body = child.body.strip()
+        body = html.body.strip()
 
         # This comment marks that we should skip the next element
         if body == "<!-- eth2spec: skip -->":
@@ -394,7 +394,7 @@ def _process_html_block(self, child):
                     f"expected table after list-of-records comment, got {type(table_element)}")
             self._process_list_of_records_table(table_element, match.group(1).upper())
 
-    def _finalize_types(self):
+    def _finalize_types(self) -> None:
         """
         Processes all_custom_types into custom_types and preset_dep_custom_types.
         Calls helper functions to update KZG and CURDLEPROOFS setups if needed.
@@ -420,7 +420,7 @@ def _finalize_types(self):
             else:
                 self.spec["custom_types"][name] = value
 
-    def _build_spec_object(self):
+    def _build_spec_object(self) -> SpecObject:
         """
         Returns the SpecObject using all collected data.
         """
@@ -505,7 +505,7 @@ def _is_constant_id(name: str) -> bool:
     return all(map(lambda c: c in string.ascii_uppercase + '_' + string.digits, name[1:]))
 
 @lru_cache(maxsize=None)
-def _load_kzg_trusted_setups(preset_name):
+def _load_kzg_trusted_setups(preset_name: str) -> Tuple[list[str], list[str], list[str]]:
     trusted_setups_file_path = str(Path(__file__).parent.parent) + '/presets/' + preset_name + '/trusted_setups/trusted_setup_4096.json'
 
     with open(trusted_setups_file_path, 'r') as f:
@@ -517,7 +517,7 @@ def _load_kzg_trusted_setups(preset_name):
     return trusted_setup_G1_monomial, trusted_setup_G1_lagrange, trusted_setup_G2_monomial
 
 @lru_cache(maxsize=None)
-def _load_curdleproofs_crs(preset_name):
+def _load_curdleproofs_crs(preset_name: str) -> Dict[str, list[str]]:
     """
     NOTE: File generated from https://github.com/asn-d6/curdleproofs/blob/8e8bf6d4191fb6a844002f75666fb7009716319b/tests/crs.rs#L53-L67
     """
@@ -588,7 +588,7 @@ def parse_markdown(content: str) -> Document:
     return gfm.parse(content)
 
 
-def check_yaml_matches_spec(var_name, yaml, value_def):
+def check_yaml_matches_spec(var_name: str, yaml: Dict[str, str], value_def: VariableDefinition) -> None:
     """
     This function performs a sanity check for presets & configs. To a certain degree, it ensures
     that the values in the specifications match those in the yaml files.

From bdfeaaf866fd5929f4943337a218c3d8743bf8f2 Mon Sep 17 00:00:00 2001
From: Leo Lara <leo@leolara.me>
Date: Wed, 21 May 2025 16:50:00 +0000
Subject: [PATCH 07/18] Split in two lines

---
 Makefile | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 70291413b4..090283ddd5 100644
--- a/Makefile
+++ b/Makefile
@@ -112,7 +112,8 @@ test: pyspec
 		$(PRESET) \
 		$(BLS) \
 		--junitxml=$(TEST_REPORT_DIR)/test_results.xml \
-		$(CURDIR)/tests/infra $(PYSPEC_DIR)/eth2spec
+		$(CURDIR)/tests/infra \
+		$(PYSPEC_DIR)/eth2spec
 
 ###############################################################################
 # Coverage

From af4b2b48a087612f7c000bb001c70940faefe3a7 Mon Sep 17 00:00:00 2001
From: Leo Lara <leo@leolara.me>
Date: Wed, 21 May 2025 16:51:28 +0000
Subject: [PATCH 08/18] mk -> md

---
 pysetup/{mk_to_spec.py => md_to_spec.py}               | 0
 tests/infra/{test_mk_to_spec.py => test_md_to_spec.py} | 0
 2 files changed, 0 insertions(+), 0 deletions(-)
 rename pysetup/{mk_to_spec.py => md_to_spec.py} (100%)
 rename tests/infra/{test_mk_to_spec.py => test_md_to_spec.py} (100%)

diff --git a/pysetup/mk_to_spec.py b/pysetup/md_to_spec.py
similarity index 100%
rename from pysetup/mk_to_spec.py
rename to pysetup/md_to_spec.py
diff --git a/tests/infra/test_mk_to_spec.py b/tests/infra/test_md_to_spec.py
similarity index 100%
rename from tests/infra/test_mk_to_spec.py
rename to tests/infra/test_md_to_spec.py

From 56c55d3206cfa60028671635e31d178f03dd501c Mon Sep 17 00:00:00 2001
From: Leo Lara <leo@leolara.me>
Date: Wed, 21 May 2025 16:59:12 +0000
Subject: [PATCH 09/18] Fix imports

---
 setup.py                       | 2 +-
 tests/infra/test_md_to_spec.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/setup.py b/setup.py
index 221d20f8bc..1c8e12b5bd 100644
--- a/setup.py
+++ b/setup.py
@@ -47,7 +47,7 @@
     SpecObject,
     VariableDefinition,
 )
-from pysetup.mk_to_spec import MarkdownToSpec
+from pysetup.md_to_spec import MarkdownToSpec
 
 # Ignore '1.5.0-alpha.*' to '1.5.0a*' messages.
 warnings.filterwarnings('ignore', message='Normalizing .* to .*')
diff --git a/tests/infra/test_md_to_spec.py b/tests/infra/test_md_to_spec.py
index a75f5dd2ab..b8693309ed 100644
--- a/tests/infra/test_md_to_spec.py
+++ b/tests/infra/test_md_to_spec.py
@@ -2,7 +2,7 @@
 
 import pytest
 
-from pysetup.mk_to_spec import MarkdownToSpec
+from pysetup.md_to_spec import MarkdownToSpec
 
 
 @pytest.fixture

From 23958cfc26bafc5689b98e4e4da675421d6d9563 Mon Sep 17 00:00:00 2001
From: Leo Lara <leo@leolara.me>
Date: Wed, 21 May 2025 17:05:44 +0000
Subject: [PATCH 10/18] Linting

---
 pysetup/md_to_spec.py | 211 +++++++++++++++++++++++++-----------------
 1 file changed, 127 insertions(+), 84 deletions(-)

diff --git a/pysetup/md_to_spec.py b/pysetup/md_to_spec.py
index 128828439a..d5afe7e263 100644
--- a/pysetup/md_to_spec.py
+++ b/pysetup/md_to_spec.py
@@ -1,40 +1,46 @@
 import ast
 import json
-from pathlib import Path
-import string
-from typing import Dict, Optional, Tuple, Iterator, cast
 import re
+import string
 from functools import lru_cache
+from pathlib import Path
+from typing import cast, Dict, Iterator, Optional, Tuple
 
-
-from marko.block import BlankLine, Heading, FencedCode, HTMLBlock, Document
+from marko.block import BlankLine, Document, FencedCode, Heading, HTMLBlock
 from marko.element import Element
 from marko.ext.gfm import gfm
-from marko.ext.gfm.elements import Table, TableRow, TableCell
+from marko.ext.gfm.elements import Table, TableCell, TableRow
 from marko.inline import CodeSpan
 
-from .typing import ProtocolDefinition, VariableDefinition, SpecObject
+from .typing import ProtocolDefinition, SpecObject, VariableDefinition
+
 
 class MarkdownToSpec:
-    def __init__(self, file_name: Path, preset: Dict[str, str], config: Dict[str, str | Dict[str, str]], preset_name: str):
+    def __init__(
+        self,
+        file_name: Path,
+        preset: Dict[str, str],
+        config: Dict[str, str | Dict[str, str]],
+        preset_name: str,
+    ):
         self.preset = preset
         self.config = config
         self.preset_name = preset_name
 
         # Use a single dict to hold all SpecObject fields
         self.spec: dict[str, dict] = {
-            "functions": {},
-            "protocols": {},
-            "custom_types": {},
-            "preset_dep_custom_types": {},
+            "config_vars": {},
             "constant_vars": {},
+            "custom_types": {},
+            "dataclasses": {},
+            "func_dep_presets": {},
+            "functions": {},
             "preset_dep_constant_vars": {},
+            "preset_dep_custom_types": {},
             "preset_vars": {},
-            "config_vars": {},
+            "protocols": {},
             "ssz_dep_constants": {},
-            "func_dep_presets": {},
             "ssz_objects": {},
-            "dataclasses": {},
         }
 
         self.all_custom_types: Dict[str, str] = {}
@@ -61,7 +67,6 @@ def _get_next_element(self) -> Optional[Element]:
         Returns the next element in the document.
         If the end of the document is reached, returns None.
         """
-
         try:
             while isinstance(result := next(self.document_iterator), BlankLine):
                 pass
@@ -149,14 +154,15 @@ def _process_code_def(self, source: str, fn: ast.FunctionDef) -> None:
         else:
             self._add_protocol_function(self_type_name, fn.name, source)
 
-    def _add_protocol_function(self, protocol_name: str, function_name: str, function_def: str) -> None:
+    def _add_protocol_function(
+        self, protocol_name: str, function_name: str, function_def: str
+    ) -> None:
         """
         Adds a function definition to the protocol functions dictionary.
         """
 
         if protocol_name not in self.spec["protocols"]:
-            self.spec["protocols"][protocol_name] = ProtocolDefinition(
-                functions={})
+            self.spec["protocols"][protocol_name] = ProtocolDefinition(functions={})
         self.spec["protocols"][protocol_name].functions[function_name] = function_def
 
     def _add_dataclass(self, source: str, cls: ast.ClassDef) -> None:
@@ -208,7 +214,9 @@ def _process_table(self, table: Table) -> None:
             # If it is not a constant, check if it is a custom type
             if not _is_constant_id(name):
                 # Check for short type declarations
-                if value.startswith(("uint", "Bytes", "ByteList", "Union", "Vector", "List", "ByteVector")):
+                if value.startswith(
+                    ("uint", "Bytes", "ByteList", "Union", "Vector", "List", "ByteVector")
+                ):
                     self.all_custom_types[name] = value
                 continue
 
@@ -228,21 +236,27 @@ def _process_table(self, table: Table) -> None:
                 if self.preset_name == "mainnet":
                     check_yaml_matches_spec(name, self.preset, value_def)
 
-                self.spec["preset_vars"][name] = VariableDefinition(value_def.type_name, self.preset[name], value_def.comment, None)
+                self.spec["preset_vars"][name] = VariableDefinition(
+                    value_def.type_name, self.preset[name], value_def.comment, None
+                )
 
             # It is a config variable
             elif name in self.config:
                 if self.preset_name == "mainnet":
                     check_yaml_matches_spec(name, self.config, value_def)
 
-                self.spec["config_vars"][name] = VariableDefinition(value_def.type_name, self.config[name], value_def.comment, None)
+                self.spec["config_vars"][name] = VariableDefinition(
+                    value_def.type_name, self.config[name], value_def.comment, None
+                )
 
             # It is a constant variable or a preset_dep_constant_vars
             else:
-                if name in ('ENDIANNESS', 'KZG_ENDIANNESS'):
+                if name in ("ENDIANNESS", "KZG_ENDIANNESS"):
                     # Deal with mypy Literal typing check
-                    value_def = _parse_value(name, value, type_hint='Final')
-                if any(k in value for k in self.preset) or any(k in value for k in self.spec["preset_dep_constant_vars"]):
+                    value_def = _parse_value(name, value, type_hint="Final")
+                if any(k in value for k in self.preset) or any(
+                    k in value for k in self.spec["preset_dep_constant_vars"]
+                ):
                     self.spec["preset_dep_constant_vars"][name] = value_def
                 else:
                     self.spec["constant_vars"][name] = value_def
@@ -305,8 +319,9 @@ def _process_list_of_records_table(self, table: Table, list_of_records_name: str
         # For mainnet, check that the spec config & file config are the same
         # For minimal, we expect this to be different; just use the file config
         if self.preset_name == "mainnet":
-            assert list_of_records_spec == list_of_records_config_file, \
-                f"list of records mismatch: {list_of_records_spec} vs {list_of_records_config_file}"
+            assert (
+                list_of_records_spec == list_of_records_config_file
+            ), f"list of records mismatch: {list_of_records_spec} vs {list_of_records_config_file}"
 
         # Set the config variable
         self.spec["config_vars"][list_of_records_name] = list_of_records_config_file
@@ -318,7 +333,7 @@ def _make_list_of_records_type_map(list_of_records: list[dict[str, str]]) -> dic
         from field name to type name, based on values of the form 'TypeName(...)'.
         """
         type_map: dict[str, str] = {}
-        pattern = re.compile(r'^(\w+)\(.*\)$')
+        pattern = re.compile(r"^(\w+)\(.*\)$")
         for entry in list_of_records:
             for k, v in entry.items():
                 m = pattern.match(v)
@@ -336,7 +351,7 @@ def _extract_list_of_records_spec(table: Table) -> list[dict[str, str]]:
         # Save the table header, used for field names (skip last item: description)
         header_row = cast(TableRow, table.children[0])
         list_of_records_spec_header = [
-            re.sub(r'\s+', '_', value.children[0].children.upper())
+            re.sub(r"\s+", "_", value.children[0].children.upper())
             for value in header_row.children[:-1]
         ]
 
@@ -385,13 +400,13 @@ def _process_html_block(self, html: HTMLBlock) -> None:
         # Handle list-of-records tables
         # This comment marks that the next table is a list-of-records
         # e.g. <!-- list-of-records: <name> -->
-        match = re.match(
-            r"<!--\s*list-of-records:([a-zA-Z0-9_-]+)\s*-->", body)
+        match = re.match(r"<!--\s*list-of-records:([a-zA-Z0-9_-]+)\s*-->", body)
         if match:
             table_element = self._get_next_element()
             if not isinstance(table_element, Table):
                 raise Exception(
-                    f"expected table after list-of-records comment, got {type(table_element)}")
+                    f"expected table after list-of-records comment, got {type(table_element)}"
+                )
             self._process_list_of_records_table(table_element, match.group(1).upper())
 
     def _finalize_types(self) -> None:
@@ -400,13 +415,13 @@ def _finalize_types(self) -> None:
         Calls helper functions to update KZG and CURDLEPROOFS setups if needed.
         """
         # Update KZG trusted setup if needed
-        if any('KZG_SETUP' in name for name in self.spec["constant_vars"]):
+        if any("KZG_SETUP" in name for name in self.spec["constant_vars"]):
             _update_constant_vars_with_kzg_setups(
                 self.spec["constant_vars"], self.spec["preset_dep_constant_vars"], self.preset_name
             )
 
         # Update CURDLEPROOFS CRS if needed
-        if any('CURDLEPROOFS_CRS' in name for name in self.spec["constant_vars"]):
+        if any("CURDLEPROOFS_CRS" in name for name in self.spec["constant_vars"]):
             _update_constant_vars_with_curdleproofs_crs(
                 self.spec["constant_vars"], self.spec["preset_dep_constant_vars"], self.preset_name
             )
@@ -415,7 +430,9 @@ def _finalize_types(self) -> None:
         self.spec["custom_types"] = {}
         self.spec["preset_dep_custom_types"] = {}
         for name, value in self.all_custom_types.items():
-            if any(k in value for k in self.preset) or any(k in value for k in self.spec["preset_dep_constant_vars"]):
+            if any(k in value for k in self.preset) or any(
+                k in value for k in self.spec["preset_dep_constant_vars"]
+            ):
                 self.spec["preset_dep_custom_types"][name] = value
             else:
                 self.spec["custom_types"][name] = value
@@ -425,20 +442,21 @@ def _build_spec_object(self) -> SpecObject:
         Returns the SpecObject using all collected data.
         """
         return SpecObject(
-            functions=self.spec["functions"],
-            protocols=self.spec["protocols"],
-            custom_types=self.spec["custom_types"],
-            preset_dep_custom_types=self.spec["preset_dep_custom_types"],
+            config_vars=self.spec["config_vars"],
             constant_vars=self.spec["constant_vars"],
+            custom_types=self.spec["custom_types"],
+            dataclasses=self.spec["dataclasses"],
+            func_dep_presets=self.spec["func_dep_presets"],
+            functions=self.spec["functions"],
             preset_dep_constant_vars=self.spec["preset_dep_constant_vars"],
+            preset_dep_custom_types=self.spec["preset_dep_custom_types"],
             preset_vars=self.spec["preset_vars"],
-            config_vars=self.spec["config_vars"],
+            protocols=self.spec["protocols"],
             ssz_dep_constants=self.spec["ssz_dep_constants"],
-            func_dep_presets=self.spec["func_dep_presets"],
             ssz_objects=self.spec["ssz_objects"],
-            dataclasses=self.spec["dataclasses"],
         )
 
+
 @lru_cache(maxsize=None)
 def _get_name_from_heading(heading: Heading) -> Optional[str]:
     last_child = heading.children[-1]
@@ -465,7 +483,7 @@ def _get_self_type_from_source(fn: ast.FunctionDef) -> Optional[str]:
     args = fn.args.args
     if len(args) == 0:
         return None
-    if args[0].arg != 'self':
+    if args[0].arg != "self":
         return None
     if args[0].annotation is None:
         return None
@@ -500,86 +518,104 @@ def _is_constant_id(name: str) -> bool:
         bool: True if the name is a valid constant identifier, False otherwise.
     """
 
-    if name[0] not in string.ascii_uppercase + '_':
+    if name[0] not in string.ascii_uppercase + "_":
         return False
-    return all(map(lambda c: c in string.ascii_uppercase + '_' + string.digits, name[1:]))
+    return all(map(lambda c: c in string.ascii_uppercase + "_" + string.digits, name[1:]))
+
 
 @lru_cache(maxsize=None)
 def _load_kzg_trusted_setups(preset_name: str) -> Tuple[list[str], list[str], list[str]]:
-    trusted_setups_file_path = str(Path(__file__).parent.parent) + '/presets/' + preset_name + '/trusted_setups/trusted_setup_4096.json'
+    trusted_setups_file_path = (
+        str(Path(__file__).parent.parent)
+        + "/presets/"
+        + preset_name
+        + "/trusted_setups/trusted_setup_4096.json"
+    )
 
-    with open(trusted_setups_file_path, 'r') as f:
+    with open(trusted_setups_file_path, "r") as f:
         json_data = json.load(f)
-        trusted_setup_G1_monomial = json_data['g1_monomial']
-        trusted_setup_G1_lagrange = json_data['g1_lagrange']
-        trusted_setup_G2_monomial = json_data['g2_monomial']
+        trusted_setup_G1_monomial = json_data["g1_monomial"]
+        trusted_setup_G1_lagrange = json_data["g1_lagrange"]
+        trusted_setup_G2_monomial = json_data["g2_monomial"]
 
     return trusted_setup_G1_monomial, trusted_setup_G1_lagrange, trusted_setup_G2_monomial
 
+
 @lru_cache(maxsize=None)
 def _load_curdleproofs_crs(preset_name: str) -> Dict[str, list[str]]:
     """
     NOTE: File generated from https://github.com/asn-d6/curdleproofs/blob/8e8bf6d4191fb6a844002f75666fb7009716319b/tests/crs.rs#L53-L67
     """
-    file_path = str(Path(__file__).parent.parent) + '/presets/' + preset_name + '/trusted_setups/curdleproofs_crs.json'
+    file_path = (
+        str(Path(__file__).parent.parent)
+        + "/presets/"
+        + preset_name
+        + "/trusted_setups/curdleproofs_crs.json"
+    )
 
-    with open(file_path, 'r') as f:
+    with open(file_path, "r") as f:
         json_data = json.load(f)
 
     return json_data
 
 
 ALL_KZG_SETUPS = {
-    'minimal': _load_kzg_trusted_setups('minimal'),
-    'mainnet': _load_kzg_trusted_setups('mainnet')
+    "minimal": _load_kzg_trusted_setups("minimal"),
+    "mainnet": _load_kzg_trusted_setups("mainnet"),
 }
 
 ALL_CURDLEPROOFS_CRS = {
-    'minimal': _load_curdleproofs_crs('minimal'),
-    'mainnet': _load_curdleproofs_crs('mainnet'),
+    "minimal": _load_curdleproofs_crs("minimal"),
+    "mainnet": _load_curdleproofs_crs("mainnet"),
 }
 
+
 @lru_cache(maxsize=None)
-def _parse_value(name: str, typed_value: str, type_hint: Optional[str] = None) -> VariableDefinition:
+def _parse_value(
+    name: str, typed_value: str, type_hint: Optional[str] = None
+) -> VariableDefinition:
     comment = None
     if name in ("ROOT_OF_UNITY_EXTENDED", "ROOTS_OF_UNITY_EXTENDED", "ROOTS_OF_UNITY_REDUCED"):
         comment = "noqa: E501"
 
     typed_value = typed_value.strip()
-    if '(' not in typed_value:
-        return VariableDefinition(type_name=None, value=typed_value, comment=comment, type_hint=type_hint)
-    i = typed_value.index('(')
+    if "(" not in typed_value:
+        return VariableDefinition(
+            type_name=None, value=typed_value, comment=comment, type_hint=type_hint
+        )
+    i = typed_value.index("(")
     type_name = typed_value[:i]
 
-    return VariableDefinition(type_name=type_name, value=typed_value[i+1:-1], comment=comment, type_hint=type_hint)
+    return VariableDefinition(
+        type_name=type_name, value=typed_value[i + 1 : -1], comment=comment, type_hint=type_hint
+    )
 
 
 def _update_constant_vars_with_kzg_setups(constant_vars, preset_dep_constant_vars, preset_name):
     comment = "noqa: E501"
     kzg_setups = ALL_KZG_SETUPS[preset_name]
-    preset_dep_constant_vars['KZG_SETUP_G1_MONOMIAL'] = VariableDefinition(
-        preset_dep_constant_vars['KZG_SETUP_G1_MONOMIAL'].value,
-        str(kzg_setups[0]),
-        comment, None
+    preset_dep_constant_vars["KZG_SETUP_G1_MONOMIAL"] = VariableDefinition(
+        preset_dep_constant_vars["KZG_SETUP_G1_MONOMIAL"].value, str(kzg_setups[0]), comment, None
     )
-    preset_dep_constant_vars['KZG_SETUP_G1_LAGRANGE'] = VariableDefinition(
-        preset_dep_constant_vars['KZG_SETUP_G1_LAGRANGE'].value,
-        str(kzg_setups[1]),
-        comment, None
+    preset_dep_constant_vars["KZG_SETUP_G1_LAGRANGE"] = VariableDefinition(
+        preset_dep_constant_vars["KZG_SETUP_G1_LAGRANGE"].value, str(kzg_setups[1]), comment, None
     )
-    constant_vars['KZG_SETUP_G2_MONOMIAL'] = VariableDefinition(
-        constant_vars['KZG_SETUP_G2_MONOMIAL'].value,
-        str(kzg_setups[2]),
-        comment, None
+    constant_vars["KZG_SETUP_G2_MONOMIAL"] = VariableDefinition(
+        constant_vars["KZG_SETUP_G2_MONOMIAL"].value, str(kzg_setups[2]), comment, None
     )
 
 
-def _update_constant_vars_with_curdleproofs_crs(constant_vars, preset_dep_constant_vars, preset_name):
+def _update_constant_vars_with_curdleproofs_crs(
+    constant_vars, preset_dep_constant_vars, preset_name
+):
     comment = "noqa: E501"
-    constant_vars['CURDLEPROOFS_CRS'] = VariableDefinition(
+    constant_vars["CURDLEPROOFS_CRS"] = VariableDefinition(
+        None,
+        "curdleproofs.CurdleproofsCrs.from_json(json.dumps("
+        + str(ALL_CURDLEPROOFS_CRS[str(preset_name)]).replace("0x", "")
+        + "))",
+        comment,
         None,
-        'curdleproofs.CurdleproofsCrs.from_json(json.dumps(' + str(ALL_CURDLEPROOFS_CRS[str(preset_name)]).replace('0x', '') + '))',
-        comment, None
     )
 
 
@@ -588,7 +624,9 @@ def parse_markdown(content: str) -> Document:
     return gfm.parse(content)
 
 
-def check_yaml_matches_spec(var_name: str, yaml: Dict[str, str], value_def: VariableDefinition) -> None:
+def check_yaml_matches_spec(
+    var_name: str, yaml: Dict[str, str], value_def: VariableDefinition
+) -> None:
     """
     This function performs a sanity check for presets & configs. To a certain degree, it ensures
     that the values in the specifications match those in the yaml files.
@@ -604,17 +642,22 @@ def check_yaml_matches_spec(var_name: str, yaml: Dict[str, str], value_def: Vari
         if var in updated_value:
             updated_value = updated_value.replace(var, yaml[var])
     try:
-        assert yaml[var_name] == repr(eval(updated_value)), \
-            f"mismatch for {var_name}: {yaml[var_name]} vs {eval(updated_value)}"
+        assert yaml[var_name] == repr(
+            eval(updated_value)
+        ), f"mismatch for {var_name}: {yaml[var_name]} vs {eval(updated_value)}"
     except NameError:
         # Okay it's probably something more serious, let's ignore
         pass
 
+
 def _has_decorator(decorateable: ast.ClassDef | ast.FunctionDef, name: str) -> bool:
     return any(_is_decorator(d, name) for d in decorateable.decorator_list)
 
+
 def _is_decorator(decorator: ast.expr, name: str) -> bool:
-    return (isinstance(decorator, ast.Name) and decorator.id == name) or \
-            (isinstance(decorator, ast.Attribute) and decorator.attr == name) or \
-            (isinstance(decorator, ast.Call) and decorator.func.id == name) or \
-            (isinstance(decorator, ast.Subscript) and decorator.value.id == name)
+    return (
+        (isinstance(decorator, ast.Name) and decorator.id == name)
+        or (isinstance(decorator, ast.Attribute) and decorator.attr == name)
+        or (isinstance(decorator, ast.Call) and decorator.func.id == name)
+        or (isinstance(decorator, ast.Subscript) and decorator.value.id == name)
+    )

From a003052939226ea870c2f31010e39301c8802f7f Mon Sep 17 00:00:00 2001
From: Leo Lara <leo@leolara.me>
Date: Thu, 22 May 2025 05:11:50 +0000
Subject: [PATCH 11/18] Remove blank line

---
 setup.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/setup.py b/setup.py
index 1c8e12b5bd..5db4ab1858 100644
--- a/setup.py
+++ b/setup.py
@@ -682,4 +682,3 @@ def run(self):
     py_modules=["eth2spec"],
     cmdclass=commands,
 )
-

From 7d660336721200063a54f057a14649ed7adda846 Mon Sep 17 00:00:00 2001
From: Leo Lara <leo@leolara.me>
Date: Thu, 22 May 2025 07:17:01 +0000
Subject: [PATCH 12/18] Fix docstrings and some typing

---
 pysetup/md_to_spec.py | 116 ++++++++++++++++--------------------------
 1 file changed, 43 insertions(+), 73 deletions(-)

diff --git a/pysetup/md_to_spec.py b/pysetup/md_to_spec.py
index d5afe7e263..69c2fecfc4 100644
--- a/pysetup/md_to_spec.py
+++ b/pysetup/md_to_spec.py
@@ -4,7 +4,7 @@
 import string
 from functools import lru_cache
 from pathlib import Path
-from typing import cast, Dict, Iterator, Optional, Tuple
+from typing import cast, Dict, Iterator, Mapping, Optional, Tuple
 
 from marko.block import BlankLine, Document, FencedCode, Heading, HTMLBlock
 from marko.element import Element
@@ -19,10 +19,13 @@ class MarkdownToSpec:
     def __init__(
         self,
         file_name: Path,
-        preset: Dict[str, str],
-        config: Dict[str, str | Dict[str, str]],
+        preset: dict[str, str],
+        config: dict[str, str | list[dict[str, str]]],
         preset_name: str,
     ):
+        """
+        Initializes the MarkdownToSpec instance.
+        """
         self.preset = preset
         self.config = config
         self.preset_name = preset_name
@@ -50,12 +53,7 @@ def __init__(
 
     def run(self) -> SpecObject:
         """
-        Orchestrates the parsing and processing of the markdown spec file.
-        - Calls _parse_document()
-        - Iterates over self.document.children and processes each child
-        - Calls _finalize_types() and _build_spec_object() after processing
-        Returns:
-            SpecObject: The constructed specification object.
+        Parses the markdown spec file and returns the SpecObject.
         """
         while (child := self._get_next_element()) is not None:
             self._process_child(child)
@@ -64,8 +62,7 @@ def run(self) -> SpecObject:
 
     def _get_next_element(self) -> Optional[Element]:
         """
-        Returns the next element in the document.
-        If the end of the document is reached, returns None.
+        Returns the next non-blank element in the document.
         """
         try:
             while isinstance(result := next(self.document_iterator), BlankLine):
@@ -82,14 +79,15 @@ def _skip_element(self) -> None:
 
     def _parse_document(self, file_name: Path) -> Iterator[Element]:
         """
-        Opens the markdown file, parses its content into a document object using _parse_markdown,
-        and stores the parsed document in self.document.
+        Parses the markdown file into document elements.
         """
         with open(file_name) as source_file:
             document = parse_markdown(source_file.read())
             return iter(document.children)
 
     def _process_child(self, child: Element) -> None:
+        """Processes a child Markdown element by dispatching to the appropriate handler based on its type."""
+
         # Skip blank lines
         if isinstance(child, BlankLine):
             return
@@ -113,10 +111,8 @@ def _process_heading(self, heading: Heading) -> None:
 
     def _process_code_block(self, code_block: FencedCode) -> None:
         """
-        Processes a FencedCode block:
-        - Checks if the code block is Python.
+        Processes a FencedCode block, ignoring non-Python code.
         - Extracts source code and determines if it is a function, dataclass, or class.
-        - Updates the appropriate dictionary (functions, protocols, dataclasses, ssz_objects).
         """
         if code_block.lang != "python":
             return
@@ -139,14 +135,8 @@ def _process_code_block(self, code_block: FencedCode) -> None:
 
     def _process_code_def(self, source: str, fn: ast.FunctionDef) -> None:
         """
-        Processes a function definition node from the AST and stores its source code representation.
-        If the function is a method (i.e., has a self type), it is added to the protocol functions for that type.
-        Otherwise, it is stored as a standalone function.
-        Args:
-            source (str): The source code of the function definition.
-            fn (ast.FunctionDef): The AST node representing the function definition.
+        Processes a function definition and stores it in the spec.
         """
-
         self_type_name = _get_self_type_from_source(fn)
 
         if self_type_name is None:
@@ -160,7 +150,6 @@ def _add_protocol_function(
         """
         Adds a function definition to the protocol functions dictionary.
         """
-
         if protocol_name not in self.spec["protocols"]:
             self.spec["protocols"][protocol_name] = ProtocolDefinition(functions={})
         self.spec["protocols"][protocol_name].functions[function_name] = function_def
@@ -170,18 +159,8 @@ def _add_dataclass(self, source: str, cls: ast.ClassDef) -> None:
 
     def _process_code_class(self, source: str, cls: ast.ClassDef) -> None:
         """
-        Processes an AST class definition node, validates its consistency with the current heading,
-        and updates the spec dictionary with the class source code.
-        Args:
-            source (str): The source code of the class.
-            cls (ast.ClassDef): The AST node representing the class definition.
-        Raises:
-            Exception: If the class name does not match the current heading name.
-            AssertionError: If the parent class is not 'Container' when a parent class is present.
-        Side Effects:
-            Updates self.spec["ssz_objects"] with the class source code, keyed by class name.
+        Processes a class definition and updates the spec.
         """
-
         class_name, parent_class = _get_class_info_from_ast(cls)
 
         # check consistency with spec
@@ -194,13 +173,8 @@ def _process_code_class(self, source: str, cls: ast.ClassDef) -> None:
 
     def _process_table(self, table: Table) -> None:
         """
-        Handles standard tables (not list-of-records).
-        Iterates over rows, extracting variable names, values, and descriptions.
-        Determines if the variable is a constant, preset, config, or custom type.
-        Updates the corresponding dictionaries.
-        Handles special cases for predefined types and function-dependent presets.
+        Processes a table and updates the spec with its data.
         """
-
         for row in cast(list[TableRow], table.children):
             if len(row.children) < 2:
                 continue
@@ -245,9 +219,13 @@ def _process_table(self, table: Table) -> None:
                 if self.preset_name == "mainnet":
                     check_yaml_matches_spec(name, self.config, value_def)
 
-                self.spec["config_vars"][name] = VariableDefinition(
-                    value_def.type_name, self.config[name], value_def.comment, None
-                )
+                config_value = self.config[name]
+                if isinstance(config_value, str):
+                    self.spec["config_vars"][name] = VariableDefinition(
+                        value_def.type_name, config_value, value_def.comment, None
+                    )
+                else:
+                    raise ValueError(f"Variable {name} should be a string in the config file.")
 
             # It is a constant variable or a preset_dep_constant_vars
             else:
@@ -265,7 +243,6 @@ def _process_table(self, table: Table) -> None:
     def _get_table_row_fields(row: TableRow) -> tuple[str, str, Optional[str]]:
         """
         Extracts the name, value, and description fields from a table row element.
-        Description can be None.
         """
         cells = cast(list[TableCell], row.children)
         name_cell = cells[0]
@@ -292,9 +269,6 @@ def _get_table_row_fields(row: TableRow) -> tuple[str, str, Optional[str]]:
     def _process_list_of_records_table(self, table: Table, list_of_records_name: str) -> None:
         """
         Handles tables marked as 'list-of-records'.
-        Extracts headers and rows, mapping field names and types.
-        Applies type mapping to config entries.
-        Validates or updates the config variable as needed based on preset_name.
         Updates config_vars with the processed list.
 
         Example of input:
@@ -374,7 +348,11 @@ def _extract_typed_records_config(
         Returns a new list of dicts with types applied.
         """
         list_of_records_config_file: list[dict[str, str]] = []
-        for entry in self.config[list_of_records_name]:
+        entries = self.config[list_of_records_name]
+        if not isinstance(entries, list):
+            raise ValueError(f"Expected a dict for {list_of_records_name} in config file")
+
+        for entry in entries:
             new_entry = {}
             for k, v in entry.items():
                 ctor = type_map.get(k)
@@ -390,7 +368,6 @@ def _process_html_block(self, html: HTMLBlock) -> None:
         Handles HTML comments for skip logic and list-of-records detection.
         Sets flags or state variables for the next iteration.
         """
-
         body = html.body.strip()
 
         # This comment marks that we should skip the next element
@@ -423,7 +400,7 @@ def _finalize_types(self) -> None:
         # Update CURDLEPROOFS CRS if needed
         if any("CURDLEPROOFS_CRS" in name for name in self.spec["constant_vars"]):
             _update_constant_vars_with_curdleproofs_crs(
-                self.spec["constant_vars"], self.spec["preset_dep_constant_vars"], self.preset_name
+                self.spec["constant_vars"], self.preset_name
             )
 
         # Split all_custom_types into custom_types and preset_dep_custom_types
@@ -470,14 +447,6 @@ def _get_source_from_code_block(block: FencedCode) -> str:
     return block.children[0].children.strip()
 
 
-@lru_cache(maxsize=None)
-def _get_function_name_from_source(source: str) -> str:
-    fn = ast.parse(source).body[0]
-    if not isinstance(fn, ast.FunctionDef):
-        raise Exception("expected function definition")
-    return fn.name
-
-
 @lru_cache(maxsize=None)
 def _get_self_type_from_source(fn: ast.FunctionDef) -> Optional[str]:
     args = fn.args.args
@@ -508,16 +477,8 @@ def _get_class_info_from_ast(cls: ast.ClassDef) -> Tuple[str, Optional[str]]:
 @lru_cache(maxsize=None)
 def _is_constant_id(name: str) -> bool:
     """
-    Check if the given name follows the convention for constant identifiers.
-    A valid constant identifier must:
-    - Start with an uppercase ASCII letter or an underscore ('_').
-    - All subsequent characters (if any) must be uppercase ASCII letters, underscores, or digits.
-    Args:
-        name (str): The identifier name to check.
-    Returns:
-        bool: True if the name is a valid constant identifier, False otherwise.
+    Checks if the given name follows the convention for constant identifiers.
     """
-
     if name[0] not in string.ascii_uppercase + "_":
         return False
     return all(map(lambda c: c in string.ascii_uppercase + "_" + string.digits, name[1:]))
@@ -591,7 +552,11 @@ def _parse_value(
     )
 
 
-def _update_constant_vars_with_kzg_setups(constant_vars, preset_dep_constant_vars, preset_name):
+def _update_constant_vars_with_kzg_setups(
+    constant_vars: dict[str, VariableDefinition],
+    preset_dep_constant_vars: dict[str, VariableDefinition],
+    preset_name: str,
+) -> None:
     comment = "noqa: E501"
     kzg_setups = ALL_KZG_SETUPS[preset_name]
     preset_dep_constant_vars["KZG_SETUP_G1_MONOMIAL"] = VariableDefinition(
@@ -606,8 +571,8 @@ def _update_constant_vars_with_kzg_setups(constant_vars, preset_dep_constant_var
 
 
 def _update_constant_vars_with_curdleproofs_crs(
-    constant_vars, preset_dep_constant_vars, preset_name
-):
+    constant_vars: dict[str, VariableDefinition], preset_name: str
+) -> None:
     comment = "noqa: E501"
     constant_vars["CURDLEPROOFS_CRS"] = VariableDefinition(
         None,
@@ -625,7 +590,7 @@ def parse_markdown(content: str) -> Document:
 
 
 def check_yaml_matches_spec(
-    var_name: str, yaml: Dict[str, str], value_def: VariableDefinition
+    var_name: str, yaml: Mapping[str, str | list[dict[str, str]]], value_def: VariableDefinition
 ) -> None:
     """
     This function performs a sanity check for presets & configs. To a certain degree, it ensures
@@ -640,7 +605,12 @@ def check_yaml_matches_spec(
     updated_value = value_def.value
     for var in sorted(yaml.keys(), reverse=True):
         if var in updated_value:
-            updated_value = updated_value.replace(var, yaml[var])
+            value = yaml[var]
+            if isinstance(value, str):
+                updated_value = updated_value.replace(var, value)
+
+            else:
+                raise ValueError(f"Variable {var} should be a string in the yaml file.")
     try:
         assert yaml[var_name] == repr(
             eval(updated_value)

From 9ea94a7148f328063828999114da64db392eb36c Mon Sep 17 00:00:00 2001
From: Leo Lara <leo@leolara.me>
Date: Fri, 23 May 2025 11:25:34 +0000
Subject: [PATCH 13/18] Restore makefile

---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 13098cd91b..16ce32e45a 100644
--- a/Makefile
+++ b/Makefile
@@ -249,4 +249,4 @@ kzg_setups: pyspec
 
 # Delete all untracked files.
 clean:
-	rm -fR venv .mypy_cache  build eth2spec.egg-info pysetup/__pycache__ pysetup/spec_builders/__pycache__
+	@git clean -fdX

From 035a3164fc4a83188ceb6907a0067cd521a446db Mon Sep 17 00:00:00 2001
From: Leo Lara <leo@leolara.me>
Date: Fri, 23 May 2025 11:28:29 +0000
Subject: [PATCH 14/18] Add fruits

---
 pysetup/md_to_spec.py | 48 ++++++++++++++++++++++++++++---------------
 1 file changed, 31 insertions(+), 17 deletions(-)

diff --git a/pysetup/md_to_spec.py b/pysetup/md_to_spec.py
index 69c2fecfc4..dc82998bdf 100644
--- a/pysetup/md_to_spec.py
+++ b/pysetup/md_to_spec.py
@@ -48,7 +48,8 @@ def __init__(
 
         self.all_custom_types: Dict[str, str] = {}
 
-        self.document_iterator: Iterator[Element] = self._parse_document(file_name)
+        self.document_iterator: Iterator[Element] = self._parse_document(
+            file_name)
         self.current_heading_name: str | None = None
 
     def run(self) -> SpecObject:
@@ -151,7 +152,8 @@ def _add_protocol_function(
         Adds a function definition to the protocol functions dictionary.
         """
         if protocol_name not in self.spec["protocols"]:
-            self.spec["protocols"][protocol_name] = ProtocolDefinition(functions={})
+            self.spec["protocols"][protocol_name] = ProtocolDefinition(
+                functions={})
         self.spec["protocols"][protocol_name].functions[function_name] = function_def
 
     def _add_dataclass(self, source: str, cls: ast.ClassDef) -> None:
@@ -165,7 +167,8 @@ def _process_code_class(self, source: str, cls: ast.ClassDef) -> None:
 
         # check consistency with spec
         if class_name != self.current_heading_name:
-            raise Exception(f"class_name {class_name} != current_name {self.current_heading_name}")
+            raise Exception(
+                f"class_name {class_name} != current_name {self.current_heading_name}")
 
         if parent_class:
             assert parent_class == "Container"
@@ -189,7 +192,8 @@ def _process_table(self, table: Table) -> None:
             if not _is_constant_id(name):
                 # Check for short type declarations
                 if value.startswith(
-                    ("uint", "Bytes", "ByteList", "Union", "Vector", "List", "ByteVector")
+                    ("uint", "Bytes", "ByteList", "Union",
+                     "Vector", "List", "ByteVector")
                 ):
                     self.all_custom_types[name] = value
                 continue
@@ -225,7 +229,8 @@ def _process_table(self, table: Table) -> None:
                         value_def.type_name, config_value, value_def.comment, None
                     )
                 else:
-                    raise ValueError(f"Variable {name} should be a string in the config file.")
+                    raise ValueError(
+                        f"Variable {name} should be a string in the config file.")
 
             # It is a constant variable or a preset_dep_constant_vars
             else:
@@ -272,10 +277,11 @@ def _process_list_of_records_table(self, table: Table, list_of_records_name: str
         Updates config_vars with the processed list.
 
         Example of input:
-            | Epoch                       | Max Blobs Per Block | Description                      |
-            | --------------------------- | ------------------- | -------------------------------- |
-            | `Epoch(269568)` **Deneb**   | `uint64(6)`         | The limit is set to `6` blobs    |
-            | `Epoch(364032)` **Electra** | `uint64(9)`         | The limit is raised to `9` blobs |
+            | Name   | Calories      | Description   |
+            | ------ | ------------- | ------------- |
+            | Apple  | `uint64(96)`  | 5.3oz serving |
+            | Orange | `uint64(75)`  | 5.6oz serving |
+            | Banana | `uint64(111)` | 4.4oz serving |
 
         The method _process_html_block calls this method when it encounters a comment
         of the form `<!-- list-of-records:name -->`.
@@ -350,7 +356,8 @@ def _extract_typed_records_config(
         list_of_records_config_file: list[dict[str, str]] = []
         entries = self.config[list_of_records_name]
         if not isinstance(entries, list):
-            raise ValueError(f"Expected a dict for {list_of_records_name} in config file")
+            raise ValueError(
+                f"Expected a dict for {list_of_records_name} in config file")
 
         for entry in entries:
             new_entry = {}
@@ -377,14 +384,16 @@ def _process_html_block(self, html: HTMLBlock) -> None:
         # Handle list-of-records tables
         # This comment marks that the next table is a list-of-records
         # e.g. <!-- list-of-records: <name> -->
-        match = re.match(r"<!--\s*list-of-records:([a-zA-Z0-9_-]+)\s*-->", body)
+        match = re.match(
+            r"<!--\s*list-of-records:([a-zA-Z0-9_-]+)\s*-->", body)
         if match:
             table_element = self._get_next_element()
             if not isinstance(table_element, Table):
                 raise Exception(
                     f"expected table after list-of-records comment, got {type(table_element)}"
                 )
-            self._process_list_of_records_table(table_element, match.group(1).upper())
+            self._process_list_of_records_table(
+                table_element, match.group(1).upper())
 
     def _finalize_types(self) -> None:
         """
@@ -548,7 +557,8 @@ def _parse_value(
     type_name = typed_value[:i]
 
     return VariableDefinition(
-        type_name=type_name, value=typed_value[i + 1 : -1], comment=comment, type_hint=type_hint
+        type_name=type_name, value=typed_value[i +
+                                               1: -1], comment=comment, type_hint=type_hint
     )
 
 
@@ -560,13 +570,16 @@ def _update_constant_vars_with_kzg_setups(
     comment = "noqa: E501"
     kzg_setups = ALL_KZG_SETUPS[preset_name]
     preset_dep_constant_vars["KZG_SETUP_G1_MONOMIAL"] = VariableDefinition(
-        preset_dep_constant_vars["KZG_SETUP_G1_MONOMIAL"].value, str(kzg_setups[0]), comment, None
+        preset_dep_constant_vars["KZG_SETUP_G1_MONOMIAL"].value, str(
+            kzg_setups[0]), comment, None
     )
     preset_dep_constant_vars["KZG_SETUP_G1_LAGRANGE"] = VariableDefinition(
-        preset_dep_constant_vars["KZG_SETUP_G1_LAGRANGE"].value, str(kzg_setups[1]), comment, None
+        preset_dep_constant_vars["KZG_SETUP_G1_LAGRANGE"].value, str(
+            kzg_setups[1]), comment, None
     )
     constant_vars["KZG_SETUP_G2_MONOMIAL"] = VariableDefinition(
-        constant_vars["KZG_SETUP_G2_MONOMIAL"].value, str(kzg_setups[2]), comment, None
+        constant_vars["KZG_SETUP_G2_MONOMIAL"].value, str(
+            kzg_setups[2]), comment, None
     )
 
 
@@ -610,7 +623,8 @@ def check_yaml_matches_spec(
                 updated_value = updated_value.replace(var, value)
 
             else:
-                raise ValueError(f"Variable {var} should be a string in the yaml file.")
+                raise ValueError(
+                    f"Variable {var} should be a string in the yaml file.")
     try:
         assert yaml[var_name] == repr(
             eval(updated_value)

From 3baab739e447c7d1ceba09f032b3510a1f2c1aa2 Mon Sep 17 00:00:00 2001
From: Leo Lara <leo@leolara.me>
Date: Sat, 24 May 2025 05:55:08 +0000
Subject: [PATCH 15/18] Fix parameter for git clean

---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 16ce32e45a..f982fcf15d 100644
--- a/Makefile
+++ b/Makefile
@@ -249,4 +249,4 @@ kzg_setups: pyspec
 
 # Delete all untracked files.
 clean:
-	@git clean -fdX
+	@git clean -fdx

From 9f4ab4a14af0cec976a2e6d491de7307210093e4 Mon Sep 17 00:00:00 2001
From: Leo Lara <leo@leolara.me>
Date: Sat, 24 May 2025 05:58:11 +0000
Subject: [PATCH 16/18] Linter

---
 pysetup/md_to_spec.py | 39 +++++++++++++--------------------------
 1 file changed, 13 insertions(+), 26 deletions(-)

diff --git a/pysetup/md_to_spec.py b/pysetup/md_to_spec.py
index dc82998bdf..706bf3f27f 100644
--- a/pysetup/md_to_spec.py
+++ b/pysetup/md_to_spec.py
@@ -48,8 +48,7 @@ def __init__(
 
         self.all_custom_types: Dict[str, str] = {}
 
-        self.document_iterator: Iterator[Element] = self._parse_document(
-            file_name)
+        self.document_iterator: Iterator[Element] = self._parse_document(file_name)
         self.current_heading_name: str | None = None
 
     def run(self) -> SpecObject:
@@ -152,8 +151,7 @@ def _add_protocol_function(
         Adds a function definition to the protocol functions dictionary.
         """
         if protocol_name not in self.spec["protocols"]:
-            self.spec["protocols"][protocol_name] = ProtocolDefinition(
-                functions={})
+            self.spec["protocols"][protocol_name] = ProtocolDefinition(functions={})
         self.spec["protocols"][protocol_name].functions[function_name] = function_def
 
     def _add_dataclass(self, source: str, cls: ast.ClassDef) -> None:
@@ -167,8 +165,7 @@ def _process_code_class(self, source: str, cls: ast.ClassDef) -> None:
 
         # check consistency with spec
         if class_name != self.current_heading_name:
-            raise Exception(
-                f"class_name {class_name} != current_name {self.current_heading_name}")
+            raise Exception(f"class_name {class_name} != current_name {self.current_heading_name}")
 
         if parent_class:
             assert parent_class == "Container"
@@ -192,8 +189,7 @@ def _process_table(self, table: Table) -> None:
             if not _is_constant_id(name):
                 # Check for short type declarations
                 if value.startswith(
-                    ("uint", "Bytes", "ByteList", "Union",
-                     "Vector", "List", "ByteVector")
+                    ("uint", "Bytes", "ByteList", "Union", "Vector", "List", "ByteVector")
                 ):
                     self.all_custom_types[name] = value
                 continue
@@ -229,8 +225,7 @@ def _process_table(self, table: Table) -> None:
                         value_def.type_name, config_value, value_def.comment, None
                     )
                 else:
-                    raise ValueError(
-                        f"Variable {name} should be a string in the config file.")
+                    raise ValueError(f"Variable {name} should be a string in the config file.")
 
             # It is a constant variable or a preset_dep_constant_vars
             else:
@@ -356,8 +351,7 @@ def _extract_typed_records_config(
         list_of_records_config_file: list[dict[str, str]] = []
         entries = self.config[list_of_records_name]
         if not isinstance(entries, list):
-            raise ValueError(
-                f"Expected a dict for {list_of_records_name} in config file")
+            raise ValueError(f"Expected a dict for {list_of_records_name} in config file")
 
         for entry in entries:
             new_entry = {}
@@ -384,16 +378,14 @@ def _process_html_block(self, html: HTMLBlock) -> None:
         # Handle list-of-records tables
         # This comment marks that the next table is a list-of-records
         # e.g. <!-- list-of-records: <name> -->
-        match = re.match(
-            r"<!--\s*list-of-records:([a-zA-Z0-9_-]+)\s*-->", body)
+        match = re.match(r"<!--\s*list-of-records:([a-zA-Z0-9_-]+)\s*-->", body)
         if match:
             table_element = self._get_next_element()
             if not isinstance(table_element, Table):
                 raise Exception(
                     f"expected table after list-of-records comment, got {type(table_element)}"
                 )
-            self._process_list_of_records_table(
-                table_element, match.group(1).upper())
+            self._process_list_of_records_table(table_element, match.group(1).upper())
 
     def _finalize_types(self) -> None:
         """
@@ -557,8 +549,7 @@ def _parse_value(
     type_name = typed_value[:i]
 
     return VariableDefinition(
-        type_name=type_name, value=typed_value[i +
-                                               1: -1], comment=comment, type_hint=type_hint
+        type_name=type_name, value=typed_value[i + 1 : -1], comment=comment, type_hint=type_hint
     )
 
 
@@ -570,16 +561,13 @@ def _update_constant_vars_with_kzg_setups(
     comment = "noqa: E501"
     kzg_setups = ALL_KZG_SETUPS[preset_name]
     preset_dep_constant_vars["KZG_SETUP_G1_MONOMIAL"] = VariableDefinition(
-        preset_dep_constant_vars["KZG_SETUP_G1_MONOMIAL"].value, str(
-            kzg_setups[0]), comment, None
+        preset_dep_constant_vars["KZG_SETUP_G1_MONOMIAL"].value, str(kzg_setups[0]), comment, None
     )
     preset_dep_constant_vars["KZG_SETUP_G1_LAGRANGE"] = VariableDefinition(
-        preset_dep_constant_vars["KZG_SETUP_G1_LAGRANGE"].value, str(
-            kzg_setups[1]), comment, None
+        preset_dep_constant_vars["KZG_SETUP_G1_LAGRANGE"].value, str(kzg_setups[1]), comment, None
     )
     constant_vars["KZG_SETUP_G2_MONOMIAL"] = VariableDefinition(
-        constant_vars["KZG_SETUP_G2_MONOMIAL"].value, str(
-            kzg_setups[2]), comment, None
+        constant_vars["KZG_SETUP_G2_MONOMIAL"].value, str(kzg_setups[2]), comment, None
     )
 
 
@@ -623,8 +611,7 @@ def check_yaml_matches_spec(
                 updated_value = updated_value.replace(var, value)
 
             else:
-                raise ValueError(
-                    f"Variable {var} should be a string in the yaml file.")
+                raise ValueError(f"Variable {var} should be a string in the yaml file.")
     try:
         assert yaml[var_name] == repr(
             eval(updated_value)

From 6595bdbf5b11f15dc486a0bdccf8a6009907aba3 Mon Sep 17 00:00:00 2001
From: Leo Lara <leo@leolara.me>
Date: Tue, 27 May 2025 06:18:53 +0000
Subject: [PATCH 17/18] Reorder __init__.py

---
 pysetup/md_to_spec.py | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/pysetup/md_to_spec.py b/pysetup/md_to_spec.py
index 706bf3f27f..be0b3fc323 100644
--- a/pysetup/md_to_spec.py
+++ b/pysetup/md_to_spec.py
@@ -30,6 +30,10 @@ def __init__(
         self.config = config
         self.preset_name = preset_name
 
+        self.document_iterator: Iterator[Element] = self._parse_document(file_name)
+        self.all_custom_types: Dict[str, str] = {}
+        self.current_heading_name: str | None = None
+
         # Use a single dict to hold all SpecObject fields
         self.spec: dict[str, dict] = {
             "config_vars": {},
@@ -46,11 +50,6 @@ def __init__(
             "ssz_objects": {},
         }
 
-        self.all_custom_types: Dict[str, str] = {}
-
-        self.document_iterator: Iterator[Element] = self._parse_document(file_name)
-        self.current_heading_name: str | None = None
-
     def run(self) -> SpecObject:
         """
         Parses the markdown spec file and returns the SpecObject.

From c92d629e3b00d39690829db902b8a0ee9f22e3bb Mon Sep 17 00:00:00 2001
From: Leo Lara <leo@leolara.me>
Date: Wed, 28 May 2025 10:54:29 +0000
Subject: [PATCH 18/18] Remove old implementation

---
 pyproject.toml |   1 -
 setup.py       | 402 +------------------------------------------------
 2 files changed, 6 insertions(+), 397 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index c51111f779..83d5a46ddb 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -4,7 +4,6 @@ requires = [
   "ruamel.yaml==0.18.10",
   "setuptools==80.7.1",
   "wheel==0.45.1",
-  "deepdiff==8.5.0",
 ]
 
 [project]
diff --git a/setup.py b/setup.py
index 5db4ab1858..2f52ccddee 100644
--- a/setup.py
+++ b/setup.py
@@ -1,9 +1,6 @@
-import ast
 import copy
-import json
 import logging
 import os
-import re
 import string
 import sys
 import warnings
@@ -20,8 +17,8 @@
 from ruamel.yaml import YAML
 from setuptools import setup, find_packages, Command
 from setuptools.command.build_py import build_py
-from typing import Dict, List, Sequence, Optional, Tuple
-from deepdiff import DeepDiff
+from typing import Dict, List, Sequence, Optional, Tuple, cast
+from pysetup.md_to_spec import MarkdownToSpec
 
 pysetup_path = os.path.abspath(os.path.dirname(__file__))
 sys.path.insert(0, pysetup_path)
@@ -43,11 +40,8 @@
 )
 from pysetup.typing import (
     BuildTarget,
-    ProtocolDefinition,
     SpecObject,
-    VariableDefinition,
 )
-from pysetup.md_to_spec import MarkdownToSpec
 
 # Ignore '1.5.0-alpha.*' to '1.5.0a*' messages.
 warnings.filterwarnings('ignore', message='Normalizing .* to .*')
@@ -59,390 +53,9 @@ def filter(self, record):
 logging.getLogger().addFilter(PyspecFilter())
 
 
-@lru_cache(maxsize=None)
-def _get_name_from_heading(heading: Heading) -> Optional[str]:
-    last_child = heading.children[-1]
-    if isinstance(last_child, CodeSpan):
-        return last_child.children
-    return None
-
-
-@lru_cache(maxsize=None)
-def _get_source_from_code_block(block: FencedCode) -> str:
-    return block.children[0].children.strip()
-
-
-@lru_cache(maxsize=None)
-def _get_function_name_from_source(source: str) -> str:
-    fn = ast.parse(source).body[0]
-    return fn.name
-
-
-@lru_cache(maxsize=None)
-def _get_self_type_from_source(source: str) -> Optional[str]:
-    fn = ast.parse(source).body[0]
-    args = fn.args.args
-    if len(args) == 0:
-        return None
-    if args[0].arg != 'self':
-        return None
-    if args[0].annotation is None:
-        return None
-    return args[0].annotation.id
-
-
-@lru_cache(maxsize=None)
-def _get_class_info_from_source(source: str) -> Tuple[str, Optional[str]]:
-    class_def = ast.parse(source).body[0]
-    base = class_def.bases[0]
-    if isinstance(base, ast.Name):
-        parent_class = base.id
-    elif isinstance(base, ast.Subscript):
-        parent_class = base.value.id
-    else:
-        # NOTE: SSZ definition derives from earlier phase...
-        # e.g. `phase0.SignedBeaconBlock`
-        # TODO: check for consistency with other phases
-        parent_class = None
-    return class_def.name, parent_class
-
-
-@lru_cache(maxsize=None)
-def _is_constant_id(name: str) -> bool:
-    if name[0] not in string.ascii_uppercase + '_':
-        return False
-    return all(map(lambda c: c in string.ascii_uppercase + '_' + string.digits, name[1:]))
-
-
-@lru_cache(maxsize=None)
-def _load_kzg_trusted_setups(preset_name):
-    trusted_setups_file_path = str(Path(__file__).parent) + '/presets/' + preset_name + '/trusted_setups/trusted_setup_4096.json'
-
-    with open(trusted_setups_file_path, 'r') as f:
-        json_data = json.load(f)
-        trusted_setup_G1_monomial = json_data['g1_monomial']
-        trusted_setup_G1_lagrange = json_data['g1_lagrange']
-        trusted_setup_G2_monomial = json_data['g2_monomial']
-
-    return trusted_setup_G1_monomial, trusted_setup_G1_lagrange, trusted_setup_G2_monomial
-
-@lru_cache(maxsize=None)
-def _load_curdleproofs_crs(preset_name):
-    """
-    NOTE: File generated from https://github.com/asn-d6/curdleproofs/blob/8e8bf6d4191fb6a844002f75666fb7009716319b/tests/crs.rs#L53-L67
-    """
-    file_path = str(Path(__file__).parent) + '/presets/' + preset_name + '/trusted_setups/curdleproofs_crs.json'
-
-    with open(file_path, 'r') as f:
-        json_data = json.load(f)
-
-    return json_data
-
-
-ALL_KZG_SETUPS = {
-    'minimal': _load_kzg_trusted_setups('minimal'),
-    'mainnet': _load_kzg_trusted_setups('mainnet')
-}
-
-ALL_CURDLEPROOFS_CRS = {
-    'minimal': _load_curdleproofs_crs('minimal'),
-    'mainnet': _load_curdleproofs_crs('mainnet'),
-}
-
-
-@lru_cache(maxsize=None)
-def _parse_value(name: str, typed_value: str, type_hint: Optional[str] = None) -> VariableDefinition:
-    comment = None
-    if name in ("ROOT_OF_UNITY_EXTENDED", "ROOTS_OF_UNITY_EXTENDED", "ROOTS_OF_UNITY_REDUCED"):
-        comment = "noqa: E501"
-
-    typed_value = typed_value.strip()
-    if '(' not in typed_value:
-        return VariableDefinition(type_name=None, value=typed_value, comment=comment, type_hint=type_hint)
-    i = typed_value.index('(')
-    type_name = typed_value[:i]
-
-    return VariableDefinition(type_name=type_name, value=typed_value[i+1:-1], comment=comment, type_hint=type_hint)
-
-
-def _update_constant_vars_with_kzg_setups(constant_vars, preset_dep_constant_vars, preset_name):
-    comment = "noqa: E501"
-    kzg_setups = ALL_KZG_SETUPS[preset_name]
-    preset_dep_constant_vars['KZG_SETUP_G1_MONOMIAL'] = VariableDefinition(
-        preset_dep_constant_vars['KZG_SETUP_G1_MONOMIAL'].value,
-        str(kzg_setups[0]),
-        comment, None
-    )
-    preset_dep_constant_vars['KZG_SETUP_G1_LAGRANGE'] = VariableDefinition(
-        preset_dep_constant_vars['KZG_SETUP_G1_LAGRANGE'].value,
-        str(kzg_setups[1]),
-        comment, None
-    )
-    constant_vars['KZG_SETUP_G2_MONOMIAL'] = VariableDefinition(
-        constant_vars['KZG_SETUP_G2_MONOMIAL'].value,
-        str(kzg_setups[2]),
-        comment, None
-    )
-
-
-def _update_constant_vars_with_curdleproofs_crs(constant_vars, preset_dep_constant_vars, preset_name):
-    comment = "noqa: E501"
-    constant_vars['CURDLEPROOFS_CRS'] = VariableDefinition(
-        None,
-        'curdleproofs.CurdleproofsCrs.from_json(json.dumps(' + str(ALL_CURDLEPROOFS_CRS[str(preset_name)]).replace('0x', '') + '))',
-        comment, None
-    )
-
-
-@lru_cache(maxsize=None)
-def parse_markdown(content: str):
-    return gfm.parse(content)
-
-
-def check_yaml_matches_spec(var_name, yaml, value_def):
-    """
-    This function performs a sanity check for presets & configs. To a certain degree, it ensures
-    that the values in the specifications match those in the yaml files.
-    """
-    if var_name == "TERMINAL_BLOCK_HASH":
-        # This is just Hash32() in the specs, that's fine
-        return
-
-    # We use a var in the definition of a new var, replace usages
-    # Reverse sort so that overridden values come first
-    updated_value = value_def.value
-    for var in sorted(yaml.keys(), reverse=True):
-        if var in updated_value:
-            updated_value = updated_value.replace(var, yaml[var])
-    try:
-        assert yaml[var_name] == repr(eval(updated_value)), \
-            f"mismatch for {var_name}: {yaml[var_name]} vs {eval(updated_value)}"
-    except NameError:
-        # Okay it's probably something more serious, let's ignore
-        pass
-
-def get_spec_new(file_name: Path, preset: Dict[str, str], config: Dict[str, str], preset_name=str) -> SpecObject:
+def get_spec(file_name: Path, preset: Dict[str, str], config: Dict[str, str | list[dict[str, str]]], preset_name: str) -> SpecObject:
     return MarkdownToSpec(file_name, preset, config, preset_name).run()
 
-def get_spec(file_name: Path, preset: Dict[str, str], config: Dict[str, str], preset_name=str) -> SpecObject:
-    functions: Dict[str, str] = {}
-    protocols: Dict[str, ProtocolDefinition] = {}
-    constant_vars: Dict[str, VariableDefinition] = {}
-    preset_dep_constant_vars: Dict[str, VariableDefinition] = {}
-    preset_vars: Dict[str, VariableDefinition] = {}
-    config_vars: Dict[str, VariableDefinition] = {}
-    ssz_dep_constants: Dict[str, str] = {}
-    func_dep_presets: Dict[str, str] = {}
-    ssz_objects: Dict[str, str] = {}
-    dataclasses: Dict[str, str] = {}
-    all_custom_types: Dict[str, str] = {}
-
-    with open(file_name) as source_file:
-        document = parse_markdown(source_file.read())
-
-    current_name = None
-    should_skip = False
-    list_of_records = None
-    list_of_records_name = None
-    for child in document.children:
-        if isinstance(child, BlankLine):
-            continue
-        if should_skip:
-            should_skip = False
-            continue
-        if isinstance(child, Heading):
-            current_name = _get_name_from_heading(child)
-        elif isinstance(child, FencedCode):
-            if child.lang != "python":
-                continue
-            source = _get_source_from_code_block(child)
-            if source.startswith("def"):
-                current_name = _get_function_name_from_source(source)
-                self_type_name = _get_self_type_from_source(source)
-                function_def = "\n".join(line.rstrip() for line in source.splitlines())
-                if self_type_name is None:
-                    functions[current_name] = function_def
-                else:
-                    if self_type_name not in protocols:
-                        protocols[self_type_name] = ProtocolDefinition(functions={})
-                    protocols[self_type_name].functions[current_name] = function_def
-            elif source.startswith("@dataclass"):
-                dataclasses[ast.parse(source).body[0].name] = "\n".join(line.rstrip() for line in source.splitlines())
-            elif source.startswith("class"):
-                class_name, parent_class = _get_class_info_from_source(source)
-                # check consistency with spec
-                try:
-                    assert class_name == current_name
-                except Exception:
-                    print('class_name', class_name)
-                    print('current_name', current_name)
-                    raise
-
-                if parent_class:
-                    assert parent_class == "Container"
-                # NOTE: trim whitespace from spec
-                ssz_objects[current_name] = "\n".join(line.rstrip() for line in source.splitlines())
-            else:
-                raise Exception("unrecognized python code element: " + source)
-        elif isinstance(child, Table) and list_of_records is not None:
-            list_of_records_header = None
-            for i, row in enumerate(child.children):
-                # This will start as an empty list when there is a <!-- list-of-records --> comment,
-                # which indicates that the next table is a list-of-records. After we're done parsing
-                # the table, we will reset this to None.
-                if list_of_records is not None:
-                    if i == 0:
-                        # Save the table header, this will be used for field names
-                        # Skip the last item, which is the description
-                        list_of_records_header = [
-                            # Convert the titles to SNAKE_CASE
-                            re.sub(r'\s+', '_', value.children[0].children.upper())
-                            for value in row.children[:-1]
-                        ]
-                    else:
-                        # Add the row entry to our list of records
-                        list_of_records.append({
-                            list_of_records_header[i]: value.children[0].children
-                            for i, value in enumerate(row.children[:-1])
-                        })
-
-            # Make a type map from the spec definition
-            # We'll apply this to the file config (ie mainnet.yaml)
-            type_map: dict[str,str] = {}
-            pattern = re.compile(r'^(\w+)\(.*\)$')
-            for entry in list_of_records:
-                for k, v in entry.items():
-                    m = pattern.match(v)
-                    if m:
-                        type_map[k] = m.group(1)
-
-            # Apply the types to the file config
-            list_of_records_config: list[dict[str,str]] = []
-            for entry in config[list_of_records_name]:
-                new_entry: dict[str,str] = {}
-                for k, v in entry.items():
-                    ctor = type_map.get(k)
-                    if ctor:
-                        new_entry[k] = f"{ctor}({v})"
-                    else:
-                        new_entry[k] = v
-                list_of_records_config.append(new_entry)
-
-            # For mainnet, check that the spec config & file config are the same
-            # For minimal, we expect this to be different; just use the file config
-            if preset_name == "mainnet":
-                assert list_of_records == list_of_records_config, \
-                    f"list of records mismatch: {list_of_records} vs {list_of_records_config}"
-            elif preset_name == "minimal":
-                list_of_records = list_of_records_config
-
-            # Set the config variable and reset the global variable
-            config_vars[list_of_records_name] = list_of_records
-            list_of_records = None
-
-        elif isinstance(child, Table):
-            for row in child.children:
-                cells = row.children
-                if len(cells) >= 2:
-                    name_cell = cells[0]
-                    name = name_cell.children[0].children
-
-                    value_cell = cells[1]
-                    value = value_cell.children[0].children
-
-                    description = None
-                    if len(cells) >= 3:
-                        description_cell = cells[2]
-                        if len(description_cell.children) > 0:
-                            description = description_cell.children[0].children
-                            if isinstance(description, list):
-                                # marko parses `**X**` as a list containing a X
-                                description = description[0].children
-
-                    if isinstance(name, list):
-                        # marko parses `[X]()` as a list containing a X
-                        name = name[0].children
-                    if isinstance(value, list):
-                        # marko parses `**X**` as a list containing a X
-                        value = value[0].children
-
-                    # Skip types that have been defined elsewhere
-                    if description is not None and description.startswith("<!-- predefined-type -->"):
-                        continue
-
-                    if not _is_constant_id(name):
-                        # Check for short type declarations
-                        if value.startswith(("uint", "Bytes", "ByteList", "Union", "Vector", "List", "ByteVector")):
-                            all_custom_types[name] = value
-                        continue
-
-                    if value.startswith("get_generalized_index"):
-                        ssz_dep_constants[name] = value
-                        continue
-
-                    if description is not None and description.startswith("<!-- predefined -->"):
-                        func_dep_presets[name] = value
-
-                    value_def = _parse_value(name, value)
-                    if name in preset:
-                        if preset_name == "mainnet":
-                            check_yaml_matches_spec(name, preset, value_def)
-                        preset_vars[name] = VariableDefinition(value_def.type_name, preset[name], value_def.comment, None)
-                    elif name in config:
-                        if preset_name == "mainnet":
-                            check_yaml_matches_spec(name, config, value_def)
-                        config_vars[name] = VariableDefinition(value_def.type_name, config[name], value_def.comment, None)
-                    else:
-                        if name in ('ENDIANNESS', 'KZG_ENDIANNESS'):
-                            # Deal with mypy Literal typing check
-                            value_def = _parse_value(name, value, type_hint='Final')
-                        if any(k in value for k in preset) or any(k in value for k in preset_dep_constant_vars):
-                            preset_dep_constant_vars[name] = value_def
-                        else:
-                            constant_vars[name] = value_def
-
-        elif isinstance(child, HTMLBlock):
-            if child.body.strip() == "<!-- eth2spec: skip -->":
-                should_skip = True
-            # Handle list-of-records tables
-            match = re.match(r"<!--\s*list-of-records:([a-zA-Z0-9_-]+)\s*-->", child.body.strip())
-            if match:
-                # Initialize list-of-records, in the next iteration this will indicate that the
-                # table is a list-of-records and must be parsed differently.
-                list_of_records = []
-                # Use regex to extract the desired configuration list name
-                list_of_records_name = match.group(1).upper()
-
-    # Load KZG trusted setup from files
-    if any('KZG_SETUP' in name for name in constant_vars):
-        _update_constant_vars_with_kzg_setups(constant_vars, preset_dep_constant_vars, preset_name)
-
-    if any('CURDLEPROOFS_CRS' in name for name in constant_vars):
-        _update_constant_vars_with_curdleproofs_crs(constant_vars, preset_dep_constant_vars, preset_name)
-
-    custom_types: Dict[str, str] = {}
-    preset_dep_custom_types: Dict[str, str] = {}
-    for name, value in all_custom_types.items():
-        if any(k in value for k in preset) or any(k in value for k in preset_dep_constant_vars):
-            preset_dep_custom_types[name] = value
-        else:
-            custom_types[name] = value
-
-    return SpecObject(
-        functions=functions,
-        protocols=protocols,
-        custom_types=custom_types,
-        preset_dep_custom_types=preset_dep_custom_types,
-        constant_vars=constant_vars,
-        preset_dep_constant_vars=preset_dep_constant_vars,
-        preset_vars=preset_vars,
-        config_vars=config_vars,
-        ssz_dep_constants=ssz_dep_constants,
-        func_dep_presets=func_dep_presets,
-        ssz_objects=ssz_objects,
-        dataclasses=dataclasses,
-    )
-
 
 @lru_cache(maxsize=None)
 def load_preset(preset_files: Sequence[Path]) -> Dict[str, str]:
@@ -460,11 +73,11 @@ def load_preset(preset_files: Sequence[Path]) -> Dict[str, str]:
             raise Exception(f"duplicate config var(s) in preset files: {', '.join(duplicates)}")
         preset.update(fork_preset)
     assert preset != {}
-    return parse_config_vars(preset)
+    return cast(Dict[str, str], parse_config_vars(preset))
 
 
 @lru_cache(maxsize=None)
-def load_config(config_path: Path) -> Dict[str, str]:
+def load_config(config_path: Path) -> Dict[str, str | List[Dict[str, str]]]:
     """
     Loads the given configuration file.
     """
@@ -480,10 +93,7 @@ def build_spec(fork: str,
                config_file: Path) -> str:
     preset = load_preset(tuple(preset_files))
     config = load_config(config_file)
-    all_specs = [get_spec_new(spec, preset, config, preset_name) for spec in source_files]
-    all_specs_old = [get_spec(spec, preset, config, preset_name) for spec in source_files]
-
-    assert DeepDiff(all_specs, all_specs_old, ignore_order=True) == {}, f"specs differ: {DeepDiff(all_specs, all_specs_old, ignore_order=True)}"
+    all_specs = [get_spec(spec, preset, config, preset_name) for spec in source_files]
 
     spec_object = all_specs[0]
     for value in all_specs[1:]: