diff --git a/.circleci/config.yml b/.circleci/config.yml index 16992d0f88..67b894f2ba 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -68,37 +68,6 @@ jobs: name: Run Tests command: zig-out/bin/test 2>&1 | cat - build_linux_release: - executor: linux-executor - steps: - - checkout - - attach_workspace: - at: workspace - - restore_cache: - key: linux-x86_64-0.15.2-{{ checksum "build.zig" }}-{{ checksum "build.zig.zon" }}-selfhosted-release - - run: - name: Build - command: | - sudo apt-get update -y && sudo apt-get install wget -y - ./scripts/proxy_workaround.sh workspace/zig/zig - workspace/zig/zig build sig fuzz -Dno-run -Denable-tsan=false -Doptimize=ReleaseSafe -Dcpu=x86_64_v3 -p workspace/zig-out-release --summary all - - run: - name: Build Conformance - command: | - cd conformance - ../workspace/zig/zig build -Doptimize=ReleaseSafe -Dcpu=x86_64_v3 -p ../workspace/conformance-release --summary all - - save_cache: - key: linux-x86_64-0.15.2-{{ checksum "build.zig" }}-{{ checksum "build.zig.zon" }}-selfhosted-release - paths: - - .zig-cache - - ~/.cache/zig - - persist_to_workspace: - root: workspace - paths: - - "zig-out-release/bin/sig" - - "zig-out-release/bin/fuzz" - - "conformance-release/lib/libsolfuzz_sig.so" - build_check_macos: executor: macos-executor steps: @@ -242,6 +211,19 @@ jobs: - checkout - attach_workspace: at: workspace + - restore_cache: + key: linux-x86_64-0.15.2-{{ checksum "build.zig" }}-{{ checksum "build.zig.zon" }}-selfhosted-release + - run: + name: Build + command: | + sudo apt-get update -y && sudo apt-get install wget -y + ./scripts/proxy_workaround.sh workspace/zig/zig + workspace/zig/zig build sig fuzz -Dno-run -Denable-tsan=false -Doptimize=ReleaseSafe -Dcpu=x86_64_v3 -p workspace/zig-out-release --summary all + - save_cache: + key: linux-x86_64-0.15.2-{{ checksum "build.zig" }}-{{ checksum "build.zig.zon" }}-selfhosted-release + paths: + - .zig-cache + - ~/.cache/zig - run: name: Run Gossip command: bash scripts/gossip_test.sh 120 workspace/zig-out-release/bin/sig @@ -258,6 +240,29 @@ jobs: name: Run Ledger Fuzzer command: workspace/zig-out-release/bin/fuzz --seed 19 ledger 10000 + build_conformance: + executor: linux-executor + steps: + - checkout + - attach_workspace: + at: workspace + - restore_cache: + key: build_conformance-v1-{{ checksum "build.zig" }}-{{ checksum "build.zig.zon" }}-{{ checksum "conformance/build.zig" }}-{{ checksum "conformance/build.zig.zon" }} + - run: + name: Build Conformance + command: | + cd conformance + ../workspace/zig/zig build -p ../workspace/conformance-release --summary all + - save_cache: + key: build_conformance-v1-{{ checksum "build.zig" }}-{{ checksum "build.zig.zon" }}-{{ checksum "conformance/build.zig" }}-{{ checksum "conformance/build.zig.zon" }} + paths: + - .zig-cache + - ~/.cache/zig + - persist_to_workspace: + root: workspace + paths: + - "conformance-release/lib/libsolfuzz_sig.so" + solana_conformance: executor: linux-executor # We need a pretty high parallelism since the vm_interp harness has a *lot* of fixtures. Perhaps @@ -267,11 +272,35 @@ jobs: - checkout - attach_workspace: at: workspace + - run: + name: Prepare /nix permissions so cache restore works + command: | + sudo mkdir -p /nix + sudo chown "$USER" /nix - restore_cache: - key: linux-x86_64-0.15.2-{{ checksum "build.zig" }}-{{ checksum "build.zig.zon" }}-selfhosted-release + key: nix-v1-{{ checksum "conformance/flake.nix" }}-{{ checksum "conformance/flake.lock" }}-{{ checksum "conformance/commits.env" }} + - run: + name: Install nix + command: | + sh <(curl --proto '=https' --tlsv1.2 -L https://nixos.org/nix/install) --no-daemon + mkdir -p ~/.config/nix + echo 'experimental-features = nix-command flakes' > ~/.config/nix/nix.conf + echo '. /home/circleci/.nix-profile/etc/profile.d/nix.sh' >> "$BASH_ENV" - run: name: Run Fixtures - command: ./conformance/scripts/ci-run.sh + command: | + cd conformance + nix develop \ + -s SPLIT_TESTS true \ + -s PREBUILT_LIB_DIR ../workspace/conformance-release/lib \ + -c ./scripts/ci-run.sh + - save_cache: + key: nix-v1-{{ checksum "conformance/flake.nix" }}-{{ checksum "conformance/flake.lock" }}-{{ checksum "conformance/commits.env" }} + paths: + - /nix/store + - /nix/var/nix/db + - /nix/var/nix/profiles + - ~/.cache/nix linux_misc_checks: executor: linux-executor @@ -301,9 +330,6 @@ jobs: - run: name: Check docs command: python3 docs/check.py ./ - - run: - name: Check conformance commits - command: conformance/scripts/verify-commits.sh - run: name: V2 checks command: | @@ -333,7 +359,7 @@ workflows: - test_kcov_linux: requires: - setup_zig_linux - - build_linux_release: + - gossip_and_fuzz: requires: - setup_zig_linux - build_and_test_linux: @@ -342,12 +368,12 @@ workflows: - linux_misc_checks: requires: - setup_zig_linux - - solana_conformance: + - build_conformance: requires: - - build_linux_release - - gossip_and_fuzz: + - setup_zig_linux + - solana_conformance: requires: - - build_linux_release + - build_conformance check_macos: jobs: diff --git a/conformance/README.md b/conformance/README.md index 2d3cf7d049..407f63c68a 100644 --- a/conformance/README.md +++ b/conformance/README.md @@ -1,24 +1,83 @@ -Dependencies: -- python 3.11 -- cargo/rust -- zig 0.15.2 -- git 2.49 -- cmake +# Dependencies + +Install [nix](https://nixos.org/download/) if you do not have it. + +```bash +nix develop # if you only want to run the tests +nix develop .#agave # if you also want to run the test vectors against agave +``` + +# Build + +To run the conformance tests, you'll need a build of solfuzz_sig. Either Debug or ReleaseSafe builds are fine. ```bash -# set up the test environment -scripts/setup-env.sh -source env/pyvenv/bin/activate +zig build solfuzz_sig +``` -# compile the sig binary to test -zig build -Doptimize=ReleaseSafe solfuzz_sig -# run the conformance tests for the first time -./run.py --create +# Run -# re-run conformance tests without re-creating the fixtures from scratch -./run.py +**Run the test vectors** -# for more options -./run.py --help +```bash +# run all conformance test vectors +run + +# run only the transaction harness tests +run txn +``` + +For customization, try `--help`. + +**Run the conformance CI job** + +```bash +scripts/ci-run.sh +``` + +**Run solana-conformance directly** + +run.py is a helper script to make this process easier. But in some cases when debugging, it's useful to interact directly with solana-conformance. Here's an example of how to run the zk_sdk tests with solana-conformance: + +```bash +solana-conformance \ + exec-fixtures \ + -i env/test-vectors/instr/fixtures/zk_sdk \ + -t zig-out/lib/libsolfuzz_sig.so \ + -o env/test-outputs/ ``` + +**Debug Agave** + +If you want to debug agave or manually generate fixtures from agave, you'll need to set up the agave environment with `nix develop .#agave`. + +You can edit any of the agave code in the env/ folder to debug agave. To run the conformance tests against this code, you'll need to compile solfuzz_agave: + +```bash +cd env/solfuzz-agave +cargo build --lib --release +``` + +Then you can use some commands like these: + +```bash +# run the tests against agave +run --exec-lib env/solfuzz-agave/target/release/libsolfuzz_agave.so + +# create the fixtures based on agave, and run the conformance tests against sig +run --create + +# re-run conformance tests against sig using your created fixtures +run --use-created +``` + +# Interpreting results + +The expected and actual results for all tests are in env/test-outputs. You can use `parseout` to interpret the results. + +```bash +parseout txn +``` + +See [parseout/README.md] for more info. diff --git a/conformance/build.zig b/conformance/build.zig index 86d4aa7f16..89e4d6cc6c 100644 --- a/conformance/build.zig +++ b/conformance/build.zig @@ -15,7 +15,7 @@ pub fn build(b: *Build) void { const solfuzz_sig_step = b.step("solfuzz_sig", "The solfuzz sig library."); const test_step = b.step("test", "Run unit tests"); - // current commit: 3dea3808eab107f3010ddd3bbc24a31cc4b07c40 + // current commit in commits.env const proto_step = b.step( "protobuf", "Re-generate protobuf definitions based on the `protosol` directory." ++ @@ -91,7 +91,6 @@ pub fn build(b: *Build) void { const protoc_run = pb.RunProtocStep.create(pb_dep.builder, target, .{ .destination_directory = b.path("src/proto"), .source_files = &.{ - "protosol/proto/elf.proto", "protosol/proto/vm.proto", "protosol/proto/shred.proto", "protosol/proto/txn.proto", diff --git a/conformance/commits.env b/conformance/commits.env index fdc3feadd7..3ab2c13e9f 100644 --- a/conformance/commits.env +++ b/conformance/commits.env @@ -1,11 +1,16 @@ # The commits currently used in CI for sig's conformance with agave. -SOLANA_CONFORMANCE_COMMIT=70b5ee3a73c3e12eb0c2f2bfd3037e5645903c94 -TEST_VECTORS_COMMIT=0a97cc29937eaa6b31e27448f2c43adf758f021c -SOLFUZZ_AGAVE_COMMIT=3e12a4b269083d16b918bc8885c89fa6f6b556d7 +SOLANA_CONFORMANCE_COMMIT="786798b9536f310cd6fe8ba8a05df0a187500cd2" +TEST_VECTORS_COMMIT="d6b81e3de1132f5032aabcd8a975e306634ae13d" +SOLFUZZ_AGAVE_COMMIT="51c1f5c8bc38748c57aa9fb4b51ce9501111c010" # protosol definitions needed for the above commit of solfuzz-agave -AGAVE_PROTOSOL_COMMIT=3dea3808eab107f3010ddd3bbc24a31cc4b07c40 +AGAVE_PROTOSOL_COMMIT="e52ca0f21f4ef68e8f2ed5269dcfaa8b50091f7f" # protosol definitions used for sig -SIG_PROTOSOL_COMMIT=3dea3808eab107f3010ddd3bbc24a31cc4b07c40 +SIG_PROTOSOL_COMMIT="e52ca0f21f4ef68e8f2ed5269dcfaa8b50091f7f" + +# these are the commits used by solfuzz_agave. +# they are not needed directly our ci, but are useful for local debugging +AGAVE_COMMIT="8d35051b7cac869bd165ae440db672617d053026" +SBPF_COMMIT="d32556248756e19ee49a44a94ccb633e2414a386" diff --git a/conformance/flake.lock b/conformance/flake.lock new file mode 100644 index 0000000000..4cd99e2c8b --- /dev/null +++ b/conformance/flake.lock @@ -0,0 +1,117 @@ +{ + "nodes": { + "flake-compat": { + "flake": false, + "locked": { + "lastModified": 1696426674, + "narHash": "sha256-kvjfFW7WAETZlt09AgDn1MrtKzP7t90Vf7vypd3OL1U=", + "owner": "edolstra", + "repo": "flake-compat", + "rev": "0f9255e01c2351cc7d116c072cb317785dd33b33", + "type": "github" + }, + "original": { + "owner": "edolstra", + "repo": "flake-compat", + "type": "github" + } + }, + "nixpkgs": { + "locked": { + "lastModified": 1773734432, + "narHash": "sha256-IF5ppUWh6gHGHYDbtVUyhwy/i7D261P7fWD1bPefOsw=", + "owner": "NixOS", + "repo": "nixpkgs", + "rev": "cda48547b432e8d3b18b4180ba07473762ec8558", + "type": "github" + }, + "original": { + "owner": "NixOS", + "ref": "nixos-unstable", + "repo": "nixpkgs", + "type": "github" + } + }, + "nixpkgs_2": { + "locked": { + "lastModified": 1771043024, + "narHash": "sha256-O1XDr7EWbRp+kHrNNgLWgIrB0/US5wvw9K6RERWAj6I=", + "owner": "NixOS", + "repo": "nixpkgs", + "rev": "3aadb7ca9eac2891d52a9dec199d9580a6e2bf44", + "type": "github" + }, + "original": { + "owner": "NixOS", + "ref": "nixos-25.11", + "repo": "nixpkgs", + "type": "github" + } + }, + "root": { + "inputs": { + "nixpkgs": "nixpkgs", + "rust-overlay": "rust-overlay", + "zig-overlay": "zig-overlay" + } + }, + "rust-overlay": { + "inputs": { + "nixpkgs": [ + "nixpkgs" + ] + }, + "locked": { + "lastModified": 1773803479, + "narHash": "sha256-GD6i1F2vrSxbsmbS92+8+x3DbHOJ+yrS78Pm4xigW4M=", + "owner": "oxalica", + "repo": "rust-overlay", + "rev": "f17186f52e82ec5cf40920b58eac63b78692ac7c", + "type": "github" + }, + "original": { + "owner": "oxalica", + "repo": "rust-overlay", + "type": "github" + } + }, + "systems": { + "flake": false, + "locked": { + "lastModified": 1681028828, + "narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=", + "owner": "nix-systems", + "repo": "default", + "rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e", + "type": "github" + }, + "original": { + "owner": "nix-systems", + "repo": "default", + "type": "github" + } + }, + "zig-overlay": { + "inputs": { + "flake-compat": "flake-compat", + "nixpkgs": "nixpkgs_2", + "systems": "systems" + }, + "locked": { + "lastModified": 1773622330, + "narHash": "sha256-eaDMObc4Y+b4nIHMf+2pmS+gcQEnRdY7xU62K0G7wzM=", + "owner": "mitchellh", + "repo": "zig-overlay", + "rev": "a20da6ffe5e036abaadc8b0cbe0c21aedf0a1dc5", + "type": "github" + }, + "original": { + "owner": "mitchellh", + "repo": "zig-overlay", + "type": "github" + } + } + }, + "root": "root", + "version": 7 +} diff --git a/conformance/flake.nix b/conformance/flake.nix new file mode 100644 index 0000000000..41b2ac8060 --- /dev/null +++ b/conformance/flake.nix @@ -0,0 +1,117 @@ +{ + description = "Sig Conformance Testing"; + + inputs = { + nixpkgs.url = "github:NixOS/nixpkgs/nixos-unstable"; + zig-overlay.url = "github:mitchellh/zig-overlay"; + rust-overlay = { + url = "github:oxalica/rust-overlay"; + inputs.nixpkgs.follows = "nixpkgs"; + }; + }; + + outputs = { + self, + nixpkgs, + zig-overlay, + rust-overlay, + }: let + system = "x86_64-linux"; + pkgs = import nixpkgs { + inherit system; + overlays = [rust-overlay.overlays.default zig-overlay.overlays.default]; + }; + baseDeps = with pkgs; [zigpkgs."0.15.2" python313 git alejandra]; + commits = builtins.fromTOML (builtins.readFile ./commits.env); + + test-vectors = builtins.fetchGit { + url = "https://github.com/firedancer-io/test-vectors.git"; + rev = commits.TEST_VECTORS_COMMIT; + }; + solana-conformance = builtins.fetchGit { + url = "https://github.com/firedancer-io/solana-conformance.git"; + rev = commits.SOLANA_CONFORMANCE_COMMIT; + }; + solfuzz-agave = builtins.fetchGit { + url = "https://github.com/firedancer-io/solfuzz-agave.git"; + rev = commits.SOLFUZZ_AGAVE_COMMIT; + }; + protosol = builtins.fetchGit { + url = "https://github.com/firedancer-io/protosol.git"; + rev = commits.AGAVE_PROTOSOL_COMMIT; + submodules = true; + }; + agave = builtins.fetchGit { + url = "https://github.com/firedancer-io/agave.git"; + rev = commits.AGAVE_COMMIT; + }; + sbpf = builtins.fetchGit { + url = "https://github.com/firedancer-io/sbpf.git"; + rev = commits.SBPF_COMMIT; + }; + + # protoc and flatc need to be the exact versions built by protosol + protosol-toolchain = pkgs.stdenvNoCC.mkDerivation { + pname = "protosol-toolchain"; + version = commits.AGAVE_PROTOSOL_COMMIT; + src = protosol; + nativeBuildInputs = with pkgs; [ cmake gcc ]; + dontConfigure = true; + buildPhase = '' + patchShebangs ./deps.sh + ./deps.sh + ''; + installPhase = '' + mkdir -p "$out" + cp -r opt/bin/* "$out" + ''; + }; + + baseShellHook = '' + export LD_LIBRARY_PATH="${pkgs.lib.makeLibraryPath [pkgs.stdenv.cc.cc.lib]}" + mkdir -p env + ln -sfn ${test-vectors} env/test-vectors + + python3.13 -m venv env/venv + source env/venv/bin/activate + ln -sfn "$PWD/run.py" env/venv/bin/run + cp -r ${solana-conformance} env/solana-conformance && chmod +w -R env/solana-conformance + export SETUPTOOLS_SCM_PRETEND_VERSION=0.0.0 + pip install -e env/solana-conformance[dev,octane] + pip install -e parseout[dev] + ''; + in { + formatter.${system} = pkgs.alejandra; + + devShells.${system} = { + default = pkgs.mkShell { + packages = baseDeps; + shellHook = baseShellHook; + }; + + agave = pkgs.mkShell { + packages = baseDeps ++ (with pkgs; [clang cmake gcc pkgs.rust-bin.stable."1.93.0".default]); + shellHook = baseShellHook + '' + export LIBCLANG_PATH="${pkgs.llvmPackages_22.libclang.lib}/lib" + export LD_LIBRARY_PATH="${pkgs.lib.makeLibraryPath [ + pkgs.stdenv.cc.cc.lib + pkgs.llvmPackages_22.libclang.lib + pkgs.llvmPackages_22.libllvm + ]}" + + export PROTOC_EXECUTABLE="${protosol-toolchain}/protoc" + export FLATC_EXECUTABLE="${protosol-toolchain}/flatc" + + [ ! -d env/agave ] && cp -r ${agave} env/agave && chmod +w -R env/agave + [ ! -d env/sbpf ] && cp -r ${sbpf} env/sbpf && chmod +w -R env/sbpf + [ ! -d env/solfuzz-agave ] && cp -r ${solfuzz-agave} env/solfuzz-agave && chmod +w -R env/solfuzz-agave + [ ! -d env/solfuzz-agave/protosol ] && cp -r ${protosol} env/solfuzz-agave/protosol && chmod +w -R env/solfuzz-agave/protosol + + pushd env/solfuzz-agave + python scripts/generate_local_cargo.py --agave-path ../agave --sbpf-path ../sbpf + popd + ''; + }; + }; + }; +} diff --git a/conformance/parseout/.gitignore b/conformance/parseout/.gitignore new file mode 100644 index 0000000000..b943e2c69c --- /dev/null +++ b/conformance/parseout/.gitignore @@ -0,0 +1,3 @@ +__pycache__ +*.pyc +.pytest_cache diff --git a/conformance/parseout/README.md b/conformance/parseout/README.md new file mode 100644 index 0000000000..63d5b32f67 --- /dev/null +++ b/conformance/parseout/README.md @@ -0,0 +1,215 @@ +# parseout + +A parser and differ for the conformance test outputs. Useful for categorizing or +interpreting a large number of failed tests. Compares expected and actual +results, identifies mismatches, and categorizes them based on the types of +mismatches. + +## Quick start + +```bash +parseout diff expected.txt actual.txt +``` + +or for a smarter version that handles transaction harness failures better than `diff`: + +```bash +parseout txn +``` + +This produces three output files prints a summary +to stdout: + +- **\*.csv:** one row per mismatch (`header, categories`), sorted so rows + with the same category combination are adjacent. +- **\*-category.json:** mismatches grouped by individual category. A + mismatch appears under every category it belongs to. +- **\*-combo.json:** mismatches grouped by their exact combination of + categories. Each mismatch appears under exactly one key. + +### Example stdout summary + +``` +1818 mismatches written to txn.csv, txn-category.json, txn-combo.json + +Categories: + result_type: 1194 + status: 233 + instruction_error: 312 + ... + +Combinations: + result_type: 1194 + modified_accounts+rollback_accounts: 185 + instruction_error+status: 155 + ... +``` + +**Categories** lists every individual category with the total number of +mismatches it appears in (a mismatch can be counted under multiple +categories). + +**Combinations** lists every unique set of categories as a single entry, +with the count of mismatches that have exactly that combination. + +## Subcommands + +### `diff` -- generic diff + +```bash +parseout diff +``` + +Uses the generic parser only. Recursively compares every field in each record +and generates diff categories dynamically from the field path where differences +occur. Category naming: + +- `field.subfield` -- values differ at that path +- `field.0.subfield` -- values differ at list index 0, subfield +- `field-missing` -- field exists in expected but not actual +- `field-unexpected` -- field exists in actual but not expected +- `field` (for lists) -- list lengths differ + +Outputs `diff.csv`, `diff-category.json`, `diff-combo.json` to the current +directory. + +### `txn` -- transaction diff + +```bash +parseout txn [expected] [actual] +``` + +A domain-specific differ for transaction conformance outputs. + +The generic differ makes a best effort at detecting differences in fields, but it +doesn't encode any domain-specific knowledge. For the transaction harness, the +mismatch categories from `diff` can be confusing. That's where this `txn` command comes in handy. + +This parses both files into typed records with known fields (status, fee_details, +modified_accounts, etc.) and compares them using a fixed set of categories +tailored to transaction semantics. + +`` and `` are optional. If omitted, it will locate files in +`env/test-outputs/txn/fixtures/` and place the outputs there as well. + +Outputs `txn.csv`, `txn-category.json`, `txn-combo.json`. + +### `parse` -- generic parse + +```bash +parseout parse +``` + +Converts a single test output file from the protobuf text format into JSON +using the generic parser. Prints to stdout. Useful if you'd like to interpret +the results in a separate program that is able to parse json. + +## Architecture + +The parser is split into two layers. The layers are composable and you can import +them as a library into python scripts for custom interpretations of test results. + +There are also two differs that each operate on the two parsing layers. + +### Parse Layer 1: generic parser (`parseout.parser`) + +Parses the protobuf-text-like format into `OrderedDict[str, dict]`. Handles +record headers, key-value pairs, nested `{ }` blocks, and record separators +(lines of 20 hyphens). No regex -- uses only string operations. + +### Parse Layer 2: transaction parser (`parseout.transaction.parser`) + +Converts the generic dicts into a typed dataclass hierarchy: + +- **Record:** wraps a `test_id` and a result variant. +- **SanitizationError:** the transaction failed before execution. +- **ExecutedSuccess:** the transaction executed and succeeded. +- **ExecutedError:** the transaction executed but failed. +- **FeeDetails:** fee breakdown (transaction fee, prioritization fee). +- **AccountEntry:** a modified or rollback account. + +The result variant is a tagged union discriminated by the `sanitization_error`, +`executed`, and `is_ok` fields in the raw data. + +### Generic differ (`parseout.differ`) - Layer 1 diff + +Compares two parsed `OrderedDict[str, Block]` collections by shared headers. +Recursively walks every field and generates categories from the path where +values diverge. Works on any file the generic parser can read. + +### Transaction differ (`parseout.transaction.differ`) - Layer 2 diff + +Compares two parsed `OrderedDict[str, Record]` collections by shared test IDs. +Each mismatch is tagged with one or more `Category` values from a fixed enum: + +| Category | Description | +| --------------------------- | ----------------------------------------------- | +| `result_type` | Different variant types (e.g. success vs error) | +| `status` | Different status codes | +| `instruction_error` | Different instruction error codes | +| `instruction_error_index` | Different instruction error indices | +| `custom_error` | Different custom error codes | +| `fee_details` | Different fee breakdowns | +| `executed_units` | Different compute unit counts | +| `loaded_accounts_data_size` | Different loaded account data sizes | +| `return_data` | Different return data | +| `modified_accounts` | Different modified account lists | +| `rollback_accounts` | Different rollback account lists | + +When two records have different variant types, only `result_type` is reported. + +## Input format + +Each file contains records separated by lines of exactly 20 hyphens +(`--------------------`). Each record starts with a header line (the test ID) +followed by a colon, then key-value pairs and nested blocks: + +``` +some_test_id: +executed: true +is_ok: true +executed_units: 27829 +fee_details { + transaction_fee: 15000 + prioritization_fee: 500 +} +modified_accounts { + address: "8fi2Typkf4m1z9miGfZQGRXDimBTVQqWHciMA9aZGXpN" + lamports: 10733753813112760225 + owner: "11111111111111111111111111111111" +} +-------------------- +another_test_id: +sanitization_error: true +status: 9 +``` + +## Tests + +```bash +pytest +``` + +151 tests run inline in the source files (parser, generic differ, transaction +parser, transaction differ). + +## Python API + +```python +# Generic diff -- works on any file, dynamic categories +from parseout import diff_files + +mismatches = diff_files("expected.txt", "actual.txt") +for m in mismatches: + print(m.header, m.categories) + +# Transaction diff -- typed records, fixed categories +from parseout.transaction import parse_file, diff_files, Category + +expected = parse_file("expected.txt") +actual = parse_file("actual.txt") + +mismatches = diff_files("expected.txt", "actual.txt") +for m in mismatches: + print(m.test_id, m.categories) +``` diff --git a/conformance/parseout/parseout/__init__.py b/conformance/parseout/parseout/__init__.py new file mode 100644 index 0000000000..f03fbb77ef --- /dev/null +++ b/conformance/parseout/parseout/__init__.py @@ -0,0 +1,28 @@ +"""parseout - A parser for protobuf-text-like files. + +Two layers: + - ``parseout.parser``: text -> ``OrderedDict[str, dict]`` (format-level) + - ``parseout.differ``: generic recursive diff on parsed dicts + - ``parseout.transaction``: typed dataclasses, parsing, and diffing + +Top-level ``parse`` / ``parse_file`` are the generic Layer 1 functions. +Top-level ``diff`` / ``diff_files`` are the generic differ functions. +Domain-specific modules live in subpackages (e.g. ``parseout.transaction``). +""" + +from .parser import parse, parse_file, Block, Value +from .differ import diff, diff_files, Mismatch + +# Make subpackage accessible as parseout.transaction +from . import transaction + +__all__ = [ + "parse", + "parse_file", + "Block", + "Value", + "diff", + "diff_files", + "Mismatch", + "transaction", +] diff --git a/conformance/parseout/parseout/__main__.py b/conformance/parseout/parseout/__main__.py new file mode 100644 index 0000000000..e0d9aa7123 --- /dev/null +++ b/conformance/parseout/parseout/__main__.py @@ -0,0 +1,283 @@ +"""Command-line interface for parseout. + +Subcommands: + parse [input-file] — generic parse to JSON on stdout + diff [expected] [actual] — generic diff producing diff.csv, diff-category.json, diff-combo.json + txn [expected] [actual] — transaction diff producing txn.csv, txn-category.json, txn-combo.json +""" + +from __future__ import annotations + +import argparse +import csv +import json +import sys +from dataclasses import asdict +from glob import glob +from pathlib import Path + +from .parser import parse_file as _generic_parse_file +from .differ import diff_files as _generic_diff_files +from .transaction import diff_files as _txn_diff_files, Category, label_programs + + +_TXN_FIXTURES = Path("env/test-outputs/txn/fixtures") +_TXN_EXPECTED_GLOB = str(_TXN_FIXTURES / "expected" / "*.txt") +_TXN_ACTUAL_GLOB = str(_TXN_FIXTURES / "actual" / "*.txt") +_TXN_FIXTURE_INPUT_DIR = Path("env/test-vectors/txn/fixtures") + + +def main(argv: list[str] | None = None) -> None: + """Entry point for the CLI.""" + top = argparse.ArgumentParser(prog="parseout", description="parseout CLI") + subs = top.add_subparsers(dest="command") + + # --- parse --- + p_parse = subs.add_parser("parse", help="Generic parse to JSON on stdout") + p_parse.add_argument("input_file", help="Path to the input text file") + + # --- diff --- + p_diff = subs.add_parser("diff", help="Generic diff producing diff.csv, diff-category.json, diff-combo.json") + p_diff.add_argument("expected", help="Path to the expected file") + p_diff.add_argument("actual", help="Path to the actual file") + + # --- txn --- + p_txn = subs.add_parser("txn", help="Transaction diff producing txn.csv, txn-category.json, txn-combo.json") + p_txn.add_argument("expected", nargs="?", default=None, help="Path to the expected file (default: first match in env/test-outputs/txn/fixtures/expected/*.txt)") + p_txn.add_argument("actual", nargs="?", default=None, help="Path to the actual file (default: first match in env/test-outputs/txn/fixtures/actual/*.txt)") + p_txn.add_argument("--fixtures", default=None, metavar="DIR", + help="Directory containing .fix protobuf fixtures for program classification " + "(default: env/test-vectors/txn/fixtures when using default expected/actual)") + p_txn.add_argument("--no-fixtures", action="store_true", + help="Disable automatic fixture loading (skip program classification)") + + args = top.parse_args(argv) + + if args.command == "parse": + _cmd_parse(args) + elif args.command == "diff": + _cmd_diff(args) + elif args.command == "txn": + _cmd_txn(args) + else: + top.print_help() + sys.exit(1) + + +def _cmd_parse(args: argparse.Namespace) -> None: + """Generic parse: file -> JSON on stdout.""" + result = _generic_parse_file(args.input_file) + json.dump(result, sys.stdout, indent=2) + sys.stdout.write("\n") + + +def _cmd_diff(args: argparse.Namespace) -> None: + """Generic diff: two files -> diff.csv + diff-category.json + diff-combo.json.""" + out_dir = Path(".") + mismatches = _generic_diff_files(args.expected, args.actual) + + # --- diff.csv (sorted by combo so same-category rows are adjacent) --- + sorted_mismatches = sorted( + mismatches, + key=lambda m: "+".join(sorted(m.categories)), + ) + csv_path = out_dir / "diff.csv" + with open(csv_path, "w", newline="") as f: + writer = csv.writer(f) + writer.writerow(["header", "categories"]) + for m in sorted_mismatches: + cats = sorted(m.categories) + writer.writerow([m.header, ",".join(cats)]) + + # --- diff-category.json --- + by_category: dict[str, list] = {} + for m in mismatches: + entry = _generic_mismatch_to_dict(m) + for cat in m.categories: + by_category.setdefault(cat, []).append(entry) + + ordered = dict(sorted(by_category.items())) + + cat_path = out_dir / "diff-category.json" + with open(cat_path, "w") as f: + json.dump(ordered, f, indent=2) + f.write("\n") + + # --- diff-combo.json --- + from collections import Counter + + by_combo: dict[str, list] = {} + combo_counts: Counter[str] = Counter() + for m in mismatches: + key = " + ".join(sorted(m.categories)) + by_combo.setdefault(key, []).append(_generic_mismatch_to_dict(m)) + combo_counts[key] += 1 + + combo_ordered = {k: by_combo[k] for k, _ in combo_counts.most_common()} + + combo_path = out_dir / "diff-combo.json" + with open(combo_path, "w") as f: + json.dump(combo_ordered, f, indent=2) + f.write("\n") + + total = len(mismatches) + print(f"{total} mismatches written to {csv_path}, {cat_path}, {combo_path}") + + # --- per-category counts --- + print("\nCategories:") + for cat in sorted(by_category.keys()): + print(f" {cat}: {len(by_category[cat])}") + + # --- combination counts --- + print("\nCombinations:") + for combo, count in combo_counts.most_common(): + print(f" {combo}: {count}") + + +def _cmd_txn(args: argparse.Namespace) -> None: + """Transaction diff: two files -> txn.csv + txn-category.json + txn-combo.json.""" + use_fixtures = args.expected is None or args.actual is None + + expected = args.expected or _resolve_glob(_TXN_EXPECTED_GLOB, "expected") + actual = args.actual or _resolve_glob(_TXN_ACTUAL_GLOB, "actual") + + # When using fixture defaults, write output alongside the fixtures + out_dir = Path(_TXN_FIXTURES) if use_fixtures else Path(".") + out_dir.mkdir(parents=True, exist_ok=True) + + # Determine fixture directory for program classification + fixture_dir: str | Path | None = None + if not args.no_fixtures: + if args.fixtures: + fixture_dir = args.fixtures + elif use_fixtures and _TXN_FIXTURE_INPUT_DIR.is_dir(): + fixture_dir = _TXN_FIXTURE_INPUT_DIR + + has_programs = fixture_dir is not None + mismatches = _txn_diff_files(expected, actual, fixture_dir=fixture_dir) + + # --- txn.csv (sorted by combo so same-category rows are adjacent) --- + sorted_mismatches = sorted( + mismatches, + key=lambda m: "+".join(sorted(c.name for c in m.categories)), + ) + csv_path = out_dir / "txn.csv" + with open(csv_path, "w", newline="") as f: + writer = csv.writer(f) + header = ["test_id", "categories"] + if has_programs: + header.append("programs") + writer.writerow(header) + for m in sorted_mismatches: + cats = sorted(c.name for c in m.categories) + row: list[str] = [m.test_id, ",".join(cats)] + if has_programs: + row.append(",".join(label_programs(m.programs))) + writer.writerow(row) + + # --- txn-category.json --- + by_category: dict[str, list] = {} + for m in mismatches: + entry = _mismatch_to_dict(m, include_programs=has_programs) + for cat in m.categories: + by_category.setdefault(cat.name, []).append(entry) + + # Sort keys to match Category enum order + ordered = {c.name: by_category[c.name] for c in Category if c.name in by_category} + + cat_path = out_dir / "txn-category.json" + with open(cat_path, "w") as f: + json.dump(ordered, f, indent=2) + f.write("\n") + + # --- txn-combo.json --- + from collections import Counter + + by_combo: dict[str, list] = {} + combo_counts: Counter[str] = Counter() + for m in mismatches: + key = " + ".join(sorted(c.name for c in m.categories)) + by_combo.setdefault(key, []).append(_mismatch_to_dict(m, include_programs=has_programs)) + combo_counts[key] += 1 + + # Sort keys by frequency (most common first) + combo_ordered = {k: by_combo[k] for k, _ in combo_counts.most_common()} + + combo_path = out_dir / "txn-combo.json" + with open(combo_path, "w") as f: + json.dump(combo_ordered, f, indent=2) + f.write("\n") + + # --- txn-program.json (mismatches grouped by invoked program) --- + if has_programs: + by_program: dict[str, list] = {} + program_counts: Counter[str] = Counter() + for m in mismatches: + labels = label_programs(m.programs) if m.programs else ["(unknown)"] + entry = _mismatch_to_dict(m, include_programs=True) + for label in labels: + by_program.setdefault(label, []).append(entry) + program_counts[label] += 1 + + program_ordered = {k: by_program[k] for k, _ in program_counts.most_common()} + prog_path = out_dir / "txn-program.json" + with open(prog_path, "w") as f: + json.dump(program_ordered, f, indent=2) + f.write("\n") + + total = len(mismatches) + output_files = f"{csv_path}, {cat_path}, {combo_path}" + if has_programs: + output_files += f", {prog_path}" + print(f"{total} mismatches written to {output_files}") + + # --- per-category counts --- + print("\nCategories:") + for c in Category: + if c.name in by_category: + print(f" {c.name}: {len(by_category[c.name])}") + + # --- combination counts --- + print("\nCombinations:") + for combo, count in combo_counts.most_common(): + print(f" {combo}: {count}") + + # --- per-program counts --- + if has_programs: + print("\nPrograms:") + for prog, count in program_counts.most_common(): + print(f" {prog}: {count}") + + +def _resolve_glob(pattern: str, label: str) -> str: + """Resolve a glob pattern to a single file path, or exit with an error.""" + matches = sorted(glob(pattern)) + if not matches: + print(f"error: no {label} file found matching {pattern}. Are you in the conformance directory?", file=sys.stderr) + sys.exit(1) + return matches[0] + + +def _mismatch_to_dict(m, include_programs: bool = False) -> dict: + """Convert a transaction Mismatch to a JSON-serialisable dict.""" + d = asdict(m) + d["categories"] = sorted(c.name for c in m.categories) + if include_programs: + d["programs"] = label_programs(m.programs) + else: + del d["programs"] + return d + + +def _generic_mismatch_to_dict(m) -> dict: + """Convert a generic Mismatch to a JSON-serialisable dict.""" + return { + "header": m.header, + "categories": sorted(m.categories), + "left": m.left, + "right": m.right, + } + + +if __name__ == "__main__": + main() diff --git a/conformance/parseout/parseout/differ.py b/conformance/parseout/parseout/differ.py new file mode 100644 index 0000000000..7e688e6e38 --- /dev/null +++ b/conformance/parseout/parseout/differ.py @@ -0,0 +1,333 @@ +"""Generic differ: compare two parsed files and categorise mismatches. + +Built on top of ``parseout.parser``. Compares two +``OrderedDict[str, Block]`` collections by their shared headers +and tags every mismatch with dynamically generated category strings +based on the field path where differences occur. + +Category naming: + - ``field.subfield`` — values differ at that path + - ``field.0.subfield`` — values differ at list index 0, subfield + - ``field-missing`` — field exists in expected but not actual + - ``field-unexpected`` — field exists in actual but not expected + - ``field`` (for lists) — list lengths differ + +Public API: + ``diff(a, b)`` — two OrderedDicts -> list[Mismatch] + ``diff_files(path_a, path_b)`` — two file paths -> list[Mismatch] +""" + +from __future__ import annotations + +from collections import OrderedDict +from dataclasses import dataclass, field +from pathlib import Path +from typing import Union + +from .parser import Block, Value, parse_file as _parse_file + + +@dataclass +class Mismatch: + """A single mismatched record between two parsed files. + + Attributes: + header: The shared record header. + categories: Set of category path strings describing what differs. + left: The Block from the first (expected) file. + right: The Block from the second (actual) file. + """ + + header: str + categories: set[str] = field(default_factory=set) + left: Block = field(default_factory=dict) + right: Block = field(default_factory=dict) + + +def diff( + a: OrderedDict[str, Block], + b: OrderedDict[str, Block], +) -> list[Mismatch]: + """Compare two parsed OrderedDicts and return categorised mismatches. + + Only records whose headers appear in *both* dicts are compared. + Records that match exactly are omitted from the result. + """ + result: list[Mismatch] = [] + for header in a: + if header not in b: + continue + left, right = a[header], b[header] + if left == right: + continue + cats = _compare(left, right, "") + result.append(Mismatch(header=header, categories=cats, left=left, right=right)) + return result + + +def diff_files(path_a: str | Path, path_b: str | Path) -> list[Mismatch]: + """Parse two files and return categorised mismatches.""" + return diff(_parse_file(path_a), _parse_file(path_b)) + + +# --------------------------------------------------------------------------- +# Internal helpers +# --------------------------------------------------------------------------- + +_Node = Union[Value, Block, list] + + +def _compare(left: _Node, right: _Node, path: str) -> set[str]: + """Recursively compare two parsed values and return category paths.""" + cats: set[str] = set() + + if isinstance(left, dict) and isinstance(right, dict): + left_keys = set(left.keys()) + right_keys = set(right.keys()) + + for key in left_keys - right_keys: + subpath = f"{path}.{key}" if path else key + cats.add(f"{subpath}-missing") + + for key in right_keys - left_keys: + subpath = f"{path}.{key}" if path else key + cats.add(f"{subpath}-unexpected") + + for key in left_keys & right_keys: + subpath = f"{path}.{key}" if path else key + cats.update(_compare(left[key], right[key], subpath)) + + elif isinstance(left, list) and isinstance(right, list): + if len(left) != len(right): + cats.add(path) + else: + for i, (lv, rv) in enumerate(zip(left, right)): + subpath = f"{path}.{i}" + cats.update(_compare(lv, rv, subpath)) + + else: + # Scalars or type mismatch (e.g., dict vs scalar). + if left != right: + cats.add(path) + + return cats + + +# --------------------------------------------------------------------------- +# Tests +# --------------------------------------------------------------------------- + +import pytest # noqa: E402 + +from .parser import TestParse # noqa: E402 + + +class TestCompare: + """Tests for _compare.""" + + def test_identical_dicts(self): + assert _compare({"a": 1}, {"a": 1}, "") == set() + + def test_scalar_diff(self): + assert _compare({"a": 1}, {"a": 2}, "") == {"a"} + + def test_nested_dict_diff(self): + assert _compare({"o": {"i": 1}}, {"o": {"i": 2}}, "") == {"o.i"} + + def test_list_same_length_diff(self): + left = {"items": [{"n": 1}, {"n": 2}]} + right = {"items": [{"n": 1}, {"n": 3}]} + assert _compare(left, right, "") == {"items.1.n"} + + def test_list_length_mismatch(self): + left = {"items": [{"n": 1}]} + right = {"items": [{"n": 1}, {"n": 2}]} + assert _compare(left, right, "") == {"items"} + + def test_missing_field(self): + assert _compare({"a": 1, "b": 2}, {"a": 1}, "") == {"b-missing"} + + def test_unexpected_field(self): + assert _compare({"a": 1}, {"a": 1, "b": 2}, "") == {"b-unexpected"} + + def test_nested_missing(self): + assert _compare({"o": {"a": 1, "b": 2}}, {"o": {"a": 1}}, "") == {"o.b-missing"} + + def test_nested_unexpected(self): + assert _compare({"o": {"a": 1}}, {"o": {"a": 1, "b": 2}}, "") == {"o.b-unexpected"} + + def test_type_mismatch_dict_vs_scalar(self): + assert _compare({"a": {"x": 1}}, {"a": 42}, "") == {"a"} + + def test_multiple_diffs(self): + assert _compare({"a": 1, "b": 2, "c": 3}, {"a": 1, "b": 99, "c": 100}, "") == {"b", "c"} + + def test_deep_nesting(self): + left = {"a": {"b": {"c": {"d": 1}}}} + right = {"a": {"b": {"c": {"d": 2}}}} + assert _compare(left, right, "") == {"a.b.c.d"} + + def test_list_element_path(self): + left = {"items": [{"x": 1}, {"x": 2}, {"x": 3}]} + right = {"items": [{"x": 1}, {"x": 99}, {"x": 3}]} + assert _compare(left, right, "") == {"items.1.x"} + + def test_empty_dicts(self): + assert _compare({}, {}, "") == set() + + def test_both_empty_lists(self): + assert _compare({"a": []}, {"a": []}, "") == set() + + def test_list_vs_dict_type_mismatch(self): + assert _compare({"a": [1, 2]}, {"a": {"x": 1}}, "") == {"a"} + + def test_bool_diff(self): + assert _compare({"f": True}, {"f": False}, "") == {"f"} + + def test_string_diff(self): + assert _compare({"s": "hello"}, {"s": "world"}, "") == {"s"} + + def test_mixed_missing_and_diff(self): + left = {"a": 1, "b": 2} + right = {"a": 99} + assert _compare(left, right, "") == {"a", "b-missing"} + + def test_mixed_unexpected_and_diff(self): + left = {"a": 1} + right = {"a": 99, "b": 2} + assert _compare(left, right, "") == {"a", "b-unexpected"} + + +class TestDiffEmpty: + """Two identical or empty inputs.""" + + def test_empty(self): + assert diff(OrderedDict(), OrderedDict()) == [] + + def test_identical(self): + a = OrderedDict(h={"x": 1, "y": 2}) + b = OrderedDict(h={"x": 1, "y": 2}) + assert diff(a, b) == [] + + +class TestDiffBasic: + """Basic diff scenarios.""" + + def test_single_scalar_diff(self): + a = OrderedDict(h={"x": 1}) + b = OrderedDict(h={"x": 2}) + ms = diff(a, b) + assert len(ms) == 1 + assert ms[0].header == "h" + assert ms[0].categories == {"x"} + + def test_nested_diff(self): + a = OrderedDict(h={"outer": {"inner": 1}}) + b = OrderedDict(h={"outer": {"inner": 2}}) + ms = diff(a, b) + assert len(ms) == 1 + assert ms[0].categories == {"outer.inner"} + + def test_missing_field(self): + a = OrderedDict(h={"a": 1, "b": 2}) + b = OrderedDict(h={"a": 1}) + ms = diff(a, b) + assert len(ms) == 1 + assert ms[0].categories == {"b-missing"} + + def test_unexpected_field(self): + a = OrderedDict(h={"a": 1}) + b = OrderedDict(h={"a": 1, "b": 2}) + ms = diff(a, b) + assert len(ms) == 1 + assert ms[0].categories == {"b-unexpected"} + + def test_list_length_mismatch(self): + a = OrderedDict(h={"items": [{"n": 1}]}) + b = OrderedDict(h={"items": [{"n": 1}, {"n": 2}]}) + ms = diff(a, b) + assert len(ms) == 1 + assert ms[0].categories == {"items"} + + def test_list_element_diff(self): + a = OrderedDict(h={"items": [{"n": 1}, {"n": 2}]}) + b = OrderedDict(h={"items": [{"n": 1}, {"n": 99}]}) + ms = diff(a, b) + assert len(ms) == 1 + assert ms[0].categories == {"items.1.n"} + + +class TestDiffHeadersOnlyInOne: + """Headers that exist in only one dict are skipped.""" + + def test_extra_in_a(self): + a = OrderedDict(h1={"x": 1}, h2={"x": 2}) + b = OrderedDict(h1={"x": 1}) + assert diff(a, b) == [] + + def test_extra_in_b(self): + a = OrderedDict(h1={"x": 1}) + b = OrderedDict(h1={"x": 1}, h2={"x": 2}) + assert diff(a, b) == [] + + +class TestDiffPreservesOrder: + """Mismatches are returned in iteration order of first dict.""" + + def test_order(self): + a = OrderedDict(bb={"x": 1}, aa={"x": 2}, cc={"x": 3}) + b = OrderedDict(bb={"x": 10}, aa={"x": 20}, cc={"x": 30}) + ms = diff(a, b) + assert [m.header for m in ms] == ["bb", "aa", "cc"] + + +class TestDiffLeftRight: + """Mismatch exposes left/right blocks.""" + + def test_left_right(self): + left_block = {"x": 1} + right_block = {"x": 2} + a = OrderedDict(h=left_block) + b = OrderedDict(h=right_block) + ms = diff(a, b) + assert ms[0].left is left_block + assert ms[0].right is right_block + + +class TestDiffMultipleCategories: + """Mismatches can have several categories.""" + + def test_two_fields(self): + a = OrderedDict(h={"a": 1, "b": 2}) + b = OrderedDict(h={"a": 10, "b": 20}) + ms = diff(a, b) + assert len(ms) == 1 + assert ms[0].categories == {"a", "b"} + + def test_diff_and_missing(self): + a = OrderedDict(h={"a": 1, "b": 2}) + b = OrderedDict(h={"a": 10}) + ms = diff(a, b) + assert len(ms) == 1 + assert ms[0].categories == {"a", "b-missing"} + + def test_nested_multiple(self): + a = OrderedDict(h={"outer": {"x": 1, "y": 2}, "z": 3}) + b = OrderedDict(h={"outer": {"x": 10, "y": 20}, "z": 30}) + ms = diff(a, b) + assert ms[0].categories == {"outer.x", "outer.y", "z"} + + +class TestDiffFiles: + """Integration test against actual data files.""" + + def test_self_vs_self(self): + ms = diff_files(TestParse.expected(), TestParse.expected()) + assert len(ms) == 0 + + def test_expected_vs_actual(self): + ms = diff_files(TestParse.expected(), TestParse.actual()) + assert len(ms) > 0 + for m in ms: + assert len(m.categories) >= 1 + assert m.header != "" diff --git a/conformance/parseout/parseout/parser.py b/conformance/parseout/parseout/parser.py new file mode 100644 index 0000000000..c078d8655b --- /dev/null +++ b/conformance/parseout/parseout/parser.py @@ -0,0 +1,277 @@ +"""Generic parser for the protobuf-text-like serialization format. + +Converts raw text into ``OrderedDict[str, dict]`` where each key is a +record header and each value is a nested dict of fields. Knows nothing +about domain-specific types. + +Value types produced: + - ``bool`` for ``true`` / ``false`` + - ``int`` for bare integer literals + - ``str`` for quoted strings (quotes stripped) + - ``dict`` for nested ``{ }`` blocks + - ``list`` when the same key appears more than once in a block +""" + +from __future__ import annotations + +from collections import OrderedDict +import os +from pathlib import Path +from typing import Union + +# 20 hyphens exactly, the record separator +_SEPARATOR = "-" * 20 + +#: The scalar types the parser produces. +Value = Union[bool, int, str] + +#: A parsed block: keys map to scalars, nested dicts, or lists thereof. +Block = dict[str, "Value | Block | list[Value | Block]"] + + +def parse(text: str) -> OrderedDict[str, Block]: + """Parse *text* into an ordered mapping of header -> fields dict. + + Args: + text: Full file content. + + Returns: + OrderedDict keyed by record header (str), values are nested dicts. + """ + result: OrderedDict[str, Block] = OrderedDict() + lines = text.splitlines() + chunk: list[str] = [] + + for line in lines: + if line.strip() == _SEPARATOR: + if chunk: + header, block = _parse_chunk(chunk) + if header is not None: + result[header] = block + chunk = [] + else: + chunk.append(line) + + if chunk: + header, block = _parse_chunk(chunk) + if header is not None: + result[header] = block + + return result + + +def parse_file(path: str | Path) -> OrderedDict[str, Block]: + """Parse a file into an ordered mapping of header -> fields dict. + + Args: + path: Path to the text file. + + Returns: + OrderedDict keyed by record header (str), values are nested dicts. + """ + return parse(Path(path).read_text()) + + +def _parse_chunk(lines: list[str]) -> tuple[str | None, Block]: + """Parse a single record chunk into (header, fields dict).""" + while lines and not lines[0].strip(): + lines = lines[1:] + while lines and not lines[-1].strip(): + lines = lines[:-1] + + if not lines: + return None, {} + + header = lines[0].strip() + if header.endswith(":"): + header = header[:-1] + + block = _parse_block_lines(lines[1:]) + return header, block + + +def _parse_block_lines(lines: list[str]) -> Block: + """Parse lines within a block (or at record top level) into a dict. + + If the same key appears more than once, the values are collected into + a list. + """ + result: Block = {} + i = 0 + while i < len(lines): + stripped = lines[i].strip() + if not stripped: + i += 1 + continue + + # Block opening: "field_name {" + if stripped.endswith("{"): + name = stripped[:-1].rstrip() + if name and name.replace("_", "").isalnum(): + inner_lines, end_idx = _collect_brace_block(lines, i + 1) + value = _parse_block_lines(inner_lines) + _insert(result, name, value) + i = end_idx + 1 + continue + + # Key: value pair + kv = _split_kv(stripped) + if kv is not None: + key, raw = kv + _insert(result, key, _parse_scalar(raw)) + i += 1 + continue + + i += 1 + + return result + + +def _collect_brace_block(lines: list[str], start: int) -> tuple[list[str], int]: + """Collect lines inside matching braces, handling nesting.""" + depth = 1 + collected: list[str] = [] + i = start + while i < len(lines): + stripped = lines[i].strip() + if stripped == "}": + depth -= 1 + if depth == 0: + return collected, i + elif stripped.endswith("{"): + depth += 1 + collected.append(lines[i]) + else: + collected.append(lines[i]) + i += 1 + return collected, len(lines) - 1 + + +def _split_kv(line: str) -> tuple[str, str] | None: + """Split ``key: value`` into (key, raw_value), or None.""" + colon = line.find(":") + if colon < 1: + return None + key = line[:colon] + if not key.replace("_", "").isalnum(): + return None + rest = line[colon + 1:] + if not rest or rest[0] != " ": + return None + value = rest.lstrip(" ") + if not value: + return None + return key, value + + +def _parse_scalar(raw: str) -> Value: + """Parse a raw scalar string into bool, int, or str.""" + if raw == "true": + return True + if raw == "false": + return False + if raw.startswith('"') and raw.endswith('"'): + return raw[1:-1] + try: + return int(raw) + except ValueError: + return raw + + +def _insert(block: Block, key: str, value: Value | Block) -> None: + """Insert a value into a block dict, promoting to list on duplicates.""" + if key not in block: + block[key] = value + else: + existing = block[key] + if isinstance(existing, list): + existing.append(value) + else: + block[key] = [existing, value] + + +# --------------------------------------------------------------------------- +# Tests +# --------------------------------------------------------------------------- + +from pathlib import Path + + +class TestParse: + """Tests for the generic text -> OrderedDict parser.""" + + @staticmethod + def expected(): + print("Running this test requires some conformance outputs to already be generated.") + return os.path.join( + "../env/test-outputs/txn/fixtures/expected", + list(Path("../env/test-outputs/txn/fixtures/expected").glob("*.txt"))[0].name, + ) + + @staticmethod + def actual(): + print("Running this test requires some conformance outputs to already be generated.") + return os.path.join( + "../env/test-outputs/txn/fixtures/actual", + list(Path("../env/test-outputs/txn/fixtures/actual").glob("*.txt"))[0].name, + ) + + def test_single_record(self): + raw = parse("my_header:\nstatus: 9\n") + assert list(raw.keys()) == ["my_header"] + assert raw["my_header"] == {"status": 9} + + def test_preserves_order(self): + raw = parse("bbb:\nx: 1\n\n--------------------\naaa:\nx: 2\n") + assert list(raw.keys()) == ["bbb", "aaa"] + + def test_bool_values(self): + raw = parse("h:\nflag_a: true\nflag_b: false\n") + assert raw["h"]["flag_a"] is True + assert raw["h"]["flag_b"] is False + + def test_int_values(self): + raw = parse("h:\ncount: 42\nbig: 1152604553296817704\n") + assert raw["h"]["count"] == 42 + assert raw["h"]["big"] == 1152604553296817704 + + def test_quoted_string_values(self): + raw = parse('h:\naddr: "abc123"\n') + assert raw["h"]["addr"] == "abc123" + + def test_nested_block(self): + raw = parse("h:\ninner {\n x: 1\n}\n") + assert raw["h"]["inner"] == {"x": 1} + + def test_repeated_blocks_become_list(self): + raw = parse("h:\nitems {\n n: 1\n}\nitems {\n n: 2\n}\n") + assert isinstance(raw["h"]["items"], list) + assert len(raw["h"]["items"]) == 2 + assert raw["h"]["items"][0] == {"n": 1} + assert raw["h"]["items"][1] == {"n": 2} + + def test_single_block_is_not_list(self): + raw = parse("h:\nitems {\n n: 1\n}\n") + assert isinstance(raw["h"]["items"], dict) + + def test_empty_text(self): + assert parse("") == {} + assert parse(" \n\n ") == {} + + def test_multiple_records_with_separator(self): + raw = parse("a:\nx: 1\n\n--------------------\nb:\ny: 2\n\n--------------------\n") + assert len(raw) == 2 + assert raw["a"] == {"x": 1} + assert raw["b"] == {"y": 2} + + def test_header_without_colon(self): + raw = parse("bare_header\nval: 1\n") + assert "bare_header" in raw + + def test_full_file_parseme1(self): + raw = parse(open(TestParse.expected()).read()) + assert len(raw) == 5244 + + def test_full_file_parseme2(self): + raw = parse(open(TestParse.actual()).read()) + assert len(raw) == 5244 diff --git a/conformance/parseout/parseout/transaction/__init__.py b/conformance/parseout/parseout/transaction/__init__.py new file mode 100644 index 0000000000..91e96f85d9 --- /dev/null +++ b/conformance/parseout/parseout/transaction/__init__.py @@ -0,0 +1,51 @@ +"""parseout.transaction - Transaction-specific parsing and diffing. + +Re-exports all public names from the parser and differ submodules so +callers can use ``parseout.transaction.parse_file(...)`` etc. +""" + +from .parser import ( + parse, + parse_file, + Record, + RecordResult, + SanitizationError, + ExecutedSuccess, + ExecutedError, + FeeDetails, + AccountEntry, +) +from .differ import ( + Category, + Mismatch, + diff, + diff_files, +) +from .fixture import ( + programs_for_fixture, + programs_for_fixtures, + label_program, + label_programs, + KNOWN_PROGRAMS, +) + +__all__ = [ + "parse", + "parse_file", + "Record", + "RecordResult", + "SanitizationError", + "ExecutedSuccess", + "ExecutedError", + "FeeDetails", + "AccountEntry", + "Category", + "Mismatch", + "diff", + "diff_files", + "programs_for_fixture", + "programs_for_fixtures", + "label_program", + "label_programs", + "KNOWN_PROGRAMS", +] diff --git a/conformance/parseout/parseout/transaction/differ.py b/conformance/parseout/parseout/transaction/differ.py new file mode 100644 index 0000000000..7176a47104 --- /dev/null +++ b/conformance/parseout/parseout/transaction/differ.py @@ -0,0 +1,746 @@ +"""Transaction differ: compare two parsed transaction files and categorise mismatches. + +Built on top of ``parseout.transaction_parser``. Compares two +``OrderedDict[str, Record]`` collections by their shared headers +and tags every mismatch with one or more ``Category`` values. + +Public API: + ``diff(a, b)`` — two OrderedDicts -> list[Mismatch] + ``diff_files(path_a, path_b)`` — two file paths -> list[Mismatch] +""" + +from __future__ import annotations + +from collections import OrderedDict +from dataclasses import dataclass, field +from enum import Enum, auto +from pathlib import Path +from typing import Union + +from .parser import ( + AccountEntry, + ExecutedError, + ExecutedSuccess, + FeeDetails, + Record, + SanitizationError, + parse_file as _parse_file, +) + + +class Category(Enum): + """Mismatch categories.""" + + result_type = auto() + status = auto() + instruction_error = auto() + instruction_error_index = auto() + custom_error = auto() + fee_details = auto() + executed_units = auto() + loaded_accounts_data_size = auto() + return_data = auto() + modified_accounts = auto() + rollback_accounts = auto() + + +@dataclass +class Mismatch: + """A single mismatched record between two parsed files. + + Attributes: + test_id: The shared record test_id. + categories: Set of ``Category`` values describing what differs. + programs: Sorted list of base58 program addresses invoked by the + transaction. Populated when fixture data is available. + left: The Record from the first file. + right: The Record from the second file. + """ + + test_id: str + categories: set[Category] = field(default_factory=set) + programs: list[str] = field(default_factory=list) + left: Record = field(default_factory=Record) + right: Record = field(default_factory=Record) + + +def diff( + a: OrderedDict[str, Record], + b: OrderedDict[str, Record], + programs: dict[str, list[str]] | None = None, +) -> list[Mismatch]: + """Compare two parsed transaction OrderedDicts and return categorised mismatches. + + Only records whose test_ids appear in *both* dicts are compared. + Records that match exactly are omitted from the result. + + Args: + a: First parsed result (``transaction_parser.parse`` output). + b: Second parsed result. + programs: Optional mapping from test_id to list of invoked program + addresses (base58). When provided, each ``Mismatch`` is + annotated with the programs for its test_id. + + Returns: + List of ``Mismatch`` objects, one per differing test_id, in the + iteration order of *a*. + """ + result: list[Mismatch] = [] + for test_id in a: + if test_id not in b: + continue + rec_a, rec_b = a[test_id], b[test_id] + if rec_a.result == rec_b.result: + continue + cats = _categorize(rec_a.result, rec_b.result) + progs = programs.get(test_id, []) if programs else [] + result.append(Mismatch(test_id=test_id, categories=cats, programs=progs, left=rec_a, right=rec_b)) + return result + + +def diff_files( + path_a: str | Path, + path_b: str | Path, + fixture_dir: str | Path | None = None, +) -> list[Mismatch]: + """Parse two files and return categorised mismatches. + + Convenience wrapper around ``diff(parse_file(a), parse_file(b))``. + + Args: + path_a: Path to the expected output file. + path_b: Path to the actual output file. + fixture_dir: Optional path to directory containing ``.fix`` protobuf + fixtures. When provided, each mismatch is annotated with the + invoked program addresses extracted from the fixture inputs. + """ + programs: dict[str, list[str]] | None = None + if fixture_dir is not None: + from .fixture import programs_for_fixtures + + programs = programs_for_fixtures(fixture_dir) + return diff(_parse_file(path_a), _parse_file(path_b), programs=programs) + + +# --------------------------------------------------------------------------- +# Internal helpers +# --------------------------------------------------------------------------- + +#: The result union for type hints. +_Result = Union[SanitizationError, ExecutedSuccess, ExecutedError] + + +def _categorize(left: _Result, right: _Result) -> set[Category]: + """Determine which categories of difference exist between two results.""" + if type(left) is not type(right): + return {Category.result_type} + + # Same type — compare field by field. Every Category except result_type + # shares its name with the corresponding dataclass field. + cats: set[Category] = set() + for cat in Category: + if cat is Category.result_type: + continue + attr = cat.name + if hasattr(left, attr) and getattr(left, attr) != getattr(right, attr): + cats.add(cat) + return cats + + +# --------------------------------------------------------------------------- +# Tests +# --------------------------------------------------------------------------- + +import pytest +from ..parser import TestParse + +class TestCategory: + """Ensure the Category enum has all expected members.""" + + def test_all_categories_exist(self): + names = {c.name for c in Category} + assert names == { + "result_type", + "status", + "instruction_error", + "instruction_error_index", + "custom_error", + "fee_details", + "executed_units", + "loaded_accounts_data_size", + "return_data", + "modified_accounts", + "rollback_accounts", + } + + +class TestDiffIdentical: + """Two identical inputs should produce no mismatches.""" + + def test_empty(self): + assert diff(OrderedDict(), OrderedDict()) == [] + + def test_identical_sanitization_error(self): + rec = Record(test_id="h", result=SanitizationError(status=9)) + a = OrderedDict(h=rec) + b = OrderedDict(h=rec) + assert diff(a, b) == [] + + def test_identical_executed_success(self): + rec = Record( + test_id="h", + result=ExecutedSuccess( + fee_details=FeeDetails(transaction_fee=5000), + modified_accounts=[AccountEntry(address="A", owner="O")], + ), + ) + a = OrderedDict(h=rec) + b = OrderedDict(h=rec) + assert diff(a, b) == [] + + +class TestDiffResultType: + """Cross-variant mismatches should be tagged result_type only.""" + + def test_sanitization_vs_executed_success(self): + a = OrderedDict(h=Record(test_id="h", result=SanitizationError(status=9))) + b = OrderedDict( + h=Record( + test_id="h", + result=ExecutedSuccess( + fee_details=FeeDetails(transaction_fee=5000), + modified_accounts=[AccountEntry(address="A", owner="O")], + ), + ) + ) + ms = diff(a, b) + assert len(ms) == 1 + assert ms[0].categories == {Category.result_type} + + def test_executed_success_vs_executed_error(self): + a = OrderedDict( + h=Record( + test_id="h", + result=ExecutedSuccess( + fee_details=FeeDetails(transaction_fee=5000), + modified_accounts=[AccountEntry(address="A", owner="O")], + ), + ) + ) + b = OrderedDict( + h=Record( + test_id="h", + result=ExecutedError( + status=9, + fee_details=FeeDetails(transaction_fee=5000), + ), + ) + ) + ms = diff(a, b) + assert len(ms) == 1 + assert ms[0].categories == {Category.result_type} + + def test_sanitization_vs_executed_error(self): + a = OrderedDict(h=Record(test_id="h", result=SanitizationError(status=9))) + b = OrderedDict( + h=Record( + test_id="h", + result=ExecutedError(status=4, fee_details=FeeDetails(transaction_fee=5000)), + ) + ) + ms = diff(a, b) + assert len(ms) == 1 + assert ms[0].categories == {Category.result_type} + + +class TestDiffStatus: + """Same variant, different status code.""" + + def test_sanitization_error_status(self): + a = OrderedDict(h=Record(test_id="h", result=SanitizationError(status=9))) + b = OrderedDict(h=Record(test_id="h", result=SanitizationError(status=27))) + ms = diff(a, b) + assert len(ms) == 1 + assert ms[0].categories == {Category.status} + + def test_executed_error_status(self): + a = OrderedDict( + h=Record( + test_id="h", + result=ExecutedError(status=4, fee_details=FeeDetails(transaction_fee=5000)), + ) + ) + b = OrderedDict( + h=Record( + test_id="h", + result=ExecutedError(status=9, fee_details=FeeDetails(transaction_fee=5000)), + ) + ) + ms = diff(a, b) + assert len(ms) == 1 + assert ms[0].categories == {Category.status} + + +class TestDiffInstructionError: + """Same variant, different instruction_error fields.""" + + def test_instruction_error_only(self): + a = OrderedDict( + h=Record( + test_id="h", + result=ExecutedError( + status=9, + instruction_error=3, + fee_details=FeeDetails(transaction_fee=5000), + ), + ) + ) + b = OrderedDict( + h=Record( + test_id="h", + result=ExecutedError( + status=9, + instruction_error=8, + fee_details=FeeDetails(transaction_fee=5000), + ), + ) + ) + ms = diff(a, b) + assert len(ms) == 1 + assert ms[0].categories == {Category.instruction_error} + + def test_instruction_error_index(self): + a = OrderedDict( + h=Record( + test_id="h", + result=ExecutedError( + status=9, + instruction_error=8, + instruction_error_index=0, + fee_details=FeeDetails(transaction_fee=5000), + ), + ) + ) + b = OrderedDict( + h=Record( + test_id="h", + result=ExecutedError( + status=9, + instruction_error=8, + instruction_error_index=2, + fee_details=FeeDetails(transaction_fee=5000), + ), + ) + ) + ms = diff(a, b) + assert len(ms) == 1 + assert ms[0].categories == {Category.instruction_error_index} + + def test_custom_error(self): + a = OrderedDict( + h=Record( + test_id="h", + result=ExecutedError( + status=9, + instruction_error=8, + custom_error=7, + fee_details=FeeDetails(transaction_fee=5000), + ), + ) + ) + b = OrderedDict( + h=Record( + test_id="h", + result=ExecutedError( + status=9, + instruction_error=8, + custom_error=42, + fee_details=FeeDetails(transaction_fee=5000), + ), + ) + ) + ms = diff(a, b) + assert len(ms) == 1 + assert ms[0].categories == {Category.custom_error} + + +class TestDiffFeeDetails: + """Same variant, different fee_details.""" + + def test_transaction_fee(self): + a = OrderedDict( + h=Record( + test_id="h", + result=ExecutedSuccess( + fee_details=FeeDetails(transaction_fee=5000), + modified_accounts=[AccountEntry(address="A", owner="O")], + ), + ) + ) + b = OrderedDict( + h=Record( + test_id="h", + result=ExecutedSuccess( + fee_details=FeeDetails(transaction_fee=10000), + modified_accounts=[AccountEntry(address="A", owner="O")], + ), + ) + ) + ms = diff(a, b) + assert len(ms) == 1 + assert ms[0].categories == {Category.fee_details} + + def test_prioritization_fee(self): + a = OrderedDict( + h=Record( + test_id="h", + result=ExecutedSuccess( + fee_details=FeeDetails(transaction_fee=5000, prioritization_fee=100), + modified_accounts=[AccountEntry(address="A", owner="O")], + ), + ) + ) + b = OrderedDict( + h=Record( + test_id="h", + result=ExecutedSuccess( + fee_details=FeeDetails(transaction_fee=5000, prioritization_fee=200), + modified_accounts=[AccountEntry(address="A", owner="O")], + ), + ) + ) + ms = diff(a, b) + assert len(ms) == 1 + assert ms[0].categories == {Category.fee_details} + + +class TestDiffExecutionFields: + """Same variant, different execution-related scalar fields.""" + + def test_executed_units(self): + a = OrderedDict( + h=Record( + test_id="h", + result=ExecutedSuccess( + fee_details=FeeDetails(transaction_fee=5000), + modified_accounts=[AccountEntry(address="A", owner="O")], + executed_units=1000, + ), + ) + ) + b = OrderedDict( + h=Record( + test_id="h", + result=ExecutedSuccess( + fee_details=FeeDetails(transaction_fee=5000), + modified_accounts=[AccountEntry(address="A", owner="O")], + executed_units=2000, + ), + ) + ) + ms = diff(a, b) + assert len(ms) == 1 + assert ms[0].categories == {Category.executed_units} + + def test_loaded_accounts_data_size(self): + a = OrderedDict( + h=Record( + test_id="h", + result=ExecutedSuccess( + fee_details=FeeDetails(transaction_fee=5000), + modified_accounts=[AccountEntry(address="A", owner="O")], + loaded_accounts_data_size=100, + ), + ) + ) + b = OrderedDict( + h=Record( + test_id="h", + result=ExecutedSuccess( + fee_details=FeeDetails(transaction_fee=5000), + modified_accounts=[AccountEntry(address="A", owner="O")], + loaded_accounts_data_size=200, + ), + ) + ) + ms = diff(a, b) + assert len(ms) == 1 + assert ms[0].categories == {Category.loaded_accounts_data_size} + + def test_return_data(self): + a = OrderedDict( + h=Record( + test_id="h", + result=ExecutedSuccess( + fee_details=FeeDetails(transaction_fee=5000), + modified_accounts=[AccountEntry(address="A", owner="O")], + return_data="aa", + ), + ) + ) + b = OrderedDict( + h=Record( + test_id="h", + result=ExecutedSuccess( + fee_details=FeeDetails(transaction_fee=5000), + modified_accounts=[AccountEntry(address="A", owner="O")], + return_data="bb", + ), + ) + ) + ms = diff(a, b) + assert len(ms) == 1 + assert ms[0].categories == {Category.return_data} + + +class TestDiffAccounts: + """Same variant, different account lists.""" + + def test_modified_accounts_count(self): + a = OrderedDict( + h=Record( + test_id="h", + result=ExecutedSuccess( + fee_details=FeeDetails(transaction_fee=5000), + modified_accounts=[ + AccountEntry(address="A", owner="O"), + AccountEntry(address="B", owner="O"), + ], + ), + ) + ) + b = OrderedDict( + h=Record( + test_id="h", + result=ExecutedSuccess( + fee_details=FeeDetails(transaction_fee=5000), + modified_accounts=[AccountEntry(address="A", owner="O")], + ), + ) + ) + ms = diff(a, b) + assert len(ms) == 1 + assert ms[0].categories == {Category.modified_accounts} + + def test_modified_accounts_lamports(self): + a = OrderedDict( + h=Record( + test_id="h", + result=ExecutedSuccess( + fee_details=FeeDetails(transaction_fee=5000), + modified_accounts=[AccountEntry(address="A", lamports=100, owner="O")], + ), + ) + ) + b = OrderedDict( + h=Record( + test_id="h", + result=ExecutedSuccess( + fee_details=FeeDetails(transaction_fee=5000), + modified_accounts=[AccountEntry(address="A", lamports=200, owner="O")], + ), + ) + ) + ms = diff(a, b) + assert len(ms) == 1 + assert ms[0].categories == {Category.modified_accounts} + + def test_modified_accounts_data(self): + a = OrderedDict( + h=Record( + test_id="h", + result=ExecutedSuccess( + fee_details=FeeDetails(transaction_fee=5000), + modified_accounts=[AccountEntry(address="A", data="aa", owner="O")], + ), + ) + ) + b = OrderedDict( + h=Record( + test_id="h", + result=ExecutedSuccess( + fee_details=FeeDetails(transaction_fee=5000), + modified_accounts=[AccountEntry(address="A", data="bb", owner="O")], + ), + ) + ) + ms = diff(a, b) + assert len(ms) == 1 + assert ms[0].categories == {Category.modified_accounts} + + def test_rollback_accounts(self): + a = OrderedDict( + h=Record( + test_id="h", + result=ExecutedError( + status=9, + fee_details=FeeDetails(transaction_fee=5000), + rollback_accounts=[AccountEntry(address="A", owner="O")], + ), + ) + ) + b = OrderedDict( + h=Record( + test_id="h", + result=ExecutedError( + status=9, + fee_details=FeeDetails(transaction_fee=5000), + rollback_accounts=[], + ), + ) + ) + ms = diff(a, b) + assert len(ms) == 1 + assert ms[0].categories == {Category.rollback_accounts} + + +class TestDiffMultipleCategories: + """Mismatches can be tagged with several categories at once.""" + + def test_status_and_instruction_error(self): + a = OrderedDict( + h=Record( + test_id="h", + result=SanitizationError(status=9, instruction_error=3), + ) + ) + b = OrderedDict( + h=Record( + test_id="h", + result=SanitizationError(status=27, instruction_error=8), + ) + ) + ms = diff(a, b) + assert len(ms) == 1 + assert ms[0].categories == {Category.status, Category.instruction_error} + + def test_three_categories(self): + a = OrderedDict( + h=Record( + test_id="h", + result=ExecutedError( + status=9, + instruction_error=3, + fee_details=FeeDetails(transaction_fee=5000), + modified_accounts=[AccountEntry(address="A", owner="O")], + ), + ) + ) + b = OrderedDict( + h=Record( + test_id="h", + result=ExecutedError( + status=9, + instruction_error=8, + fee_details=FeeDetails(transaction_fee=10000), + modified_accounts=[AccountEntry(address="B", owner="O")], + ), + ) + ) + ms = diff(a, b) + assert len(ms) == 1 + assert ms[0].categories == { + Category.instruction_error, + Category.fee_details, + Category.modified_accounts, + } + + +class TestDiffTestIdsOnlyInOne: + """Test_ids that exist in only one dict are skipped (not errors).""" + + def test_extra_in_a(self): + a = OrderedDict( + h1=Record(test_id="h1", result=SanitizationError(status=9)), + h2=Record(test_id="h2", result=SanitizationError(status=4)), + ) + b = OrderedDict( + h1=Record(test_id="h1", result=SanitizationError(status=9)), + ) + ms = diff(a, b) + assert len(ms) == 0 + + def test_extra_in_b(self): + a = OrderedDict( + h1=Record(test_id="h1", result=SanitizationError(status=9)), + ) + b = OrderedDict( + h1=Record(test_id="h1", result=SanitizationError(status=9)), + h2=Record(test_id="h2", result=SanitizationError(status=4)), + ) + ms = diff(a, b) + assert len(ms) == 0 + + +class TestDiffPreservesOrder: + """Mismatches are returned in the iteration order of the first dict.""" + + def test_order(self): + a = OrderedDict( + bb=Record(test_id="bb", result=SanitizationError(status=1)), + aa=Record(test_id="aa", result=SanitizationError(status=2)), + cc=Record(test_id="cc", result=SanitizationError(status=3)), + ) + b = OrderedDict( + bb=Record(test_id="bb", result=SanitizationError(status=10)), + aa=Record(test_id="aa", result=SanitizationError(status=20)), + cc=Record(test_id="cc", result=SanitizationError(status=30)), + ) + ms = diff(a, b) + assert [m.test_id for m in ms] == ["bb", "aa", "cc"] + + +class TestDiffMismatchFields: + """The Mismatch object exposes left/right records.""" + + def test_left_right(self): + rec_a = Record(test_id="h", result=SanitizationError(status=9)) + rec_b = Record(test_id="h", result=SanitizationError(status=27)) + a = OrderedDict(h=rec_a) + b = OrderedDict(h=rec_b) + ms = diff(a, b) + assert ms[0].left is rec_a + assert ms[0].right is rec_b + + +class TestDiffFiles: + """Integration test against the actual data files.""" + + def test_parseme1_vs_parseme2(self): + ms = diff_files( + TestParse.expected(), + TestParse.actual(), + ) + assert len(ms) == 1818 + # Every mismatch has at least one category + for m in ms: + assert len(m.categories) >= 1 + assert m.test_id != "" + assert m.left.result is not None + assert m.right.result is not None + + def test_parseme1_vs_parseme2_category_counts(self): + ms = diff_files( + TestParse.expected(), + TestParse.actual(), + ) + from collections import Counter + + counts = Counter() + for m in ms: + for c in m.categories: + counts[c] += 1 + + # Verified counts from manual analysis + assert counts[Category.result_type] == 1194 + assert counts[Category.status] == 233 + assert counts[Category.instruction_error] == 312 + assert counts[Category.modified_accounts] > 0 + assert counts[Category.rollback_accounts] > 0 + + def test_parseme1_vs_self(self): + ms = diff_files( + TestParse.expected(), + TestParse.expected(), + ) + assert len(ms) == 0 diff --git a/conformance/parseout/parseout/transaction/fixture.py b/conformance/parseout/parseout/transaction/fixture.py new file mode 100644 index 0000000000..16c07a1cde --- /dev/null +++ b/conformance/parseout/parseout/transaction/fixture.py @@ -0,0 +1,334 @@ +"""Read invoked program IDs from binary protobuf ``.fix`` fixture files. + +Implements a minimal protobuf wire-format decoder — just enough to +navigate from ``TxnFixture`` down to the ``CompiledInstruction`` +program IDs without requiring any generated code or the +``google.protobuf`` package. + +Wire-format field numbers (from ``v1.pb.zig``): + + TxnFixture + 1: metadata (FixtureMetadata) + 2: input (TxnContext) + + TxnContext + 1: tx (SanitizedTransaction) + + SanitizedTransaction + 1: message (TransactionMessage) + + TransactionMessage + 3: account_keys (repeated bytes) + 6: instructions (repeated CompiledInstruction) + + CompiledInstruction + 1: program_id_index (uint32) + +Public API: + ``programs_for_fixture(path)`` — single file -> sorted list[str] + ``programs_for_fixtures(fixture_dir)`` — directory -> dict[test_id, list[str]] +""" + +from __future__ import annotations + +import base64 +from pathlib import Path + + +# --------------------------------------------------------------------------- +# Minimal protobuf wire-format helpers +# --------------------------------------------------------------------------- + +def _decode_varint(data: bytes, pos: int) -> tuple[int, int]: + """Decode a base-128 varint, returning (value, new_pos).""" + result = 0 + shift = 0 + while True: + b = data[pos] + result |= (b & 0x7F) << shift + pos += 1 + if (b & 0x80) == 0: + return result, pos + shift += 7 + + +def _iter_fields(data: bytes) -> list[tuple[int, int, bytes | int]]: + """Yield (field_number, wire_type, payload) tuples from *data*. + + For wire type 0 (varint), payload is an ``int``. + For wire type 2 (length-delimited), payload is ``bytes``. + Other wire types are skipped. + """ + fields: list[tuple[int, int, bytes | int]] = [] + pos = 0 + end = len(data) + while pos < end: + tag, pos = _decode_varint(data, pos) + field_num = tag >> 3 + wire_type = tag & 0x07 + if wire_type == 0: # varint + val, pos = _decode_varint(data, pos) + fields.append((field_num, wire_type, val)) + elif wire_type == 2: # length-delimited + length, pos = _decode_varint(data, pos) + fields.append((field_num, wire_type, data[pos : pos + length])) + pos += length + elif wire_type == 5: # 32-bit fixed + pos += 4 + elif wire_type == 1: # 64-bit fixed + pos += 8 + else: + # Unknown wire type — we can't safely skip, so stop. + break + return fields + + +def _get_submessage(data: bytes, field_num: int) -> bytes | None: + """Return the raw bytes of the first length-delimited field with *field_num*.""" + for fnum, wtype, payload in _iter_fields(data): + if fnum == field_num and wtype == 2: + assert isinstance(payload, bytes) + return payload + return None + + +def _get_repeated_bytes(data: bytes, field_num: int) -> list[bytes]: + """Return all length-delimited payloads for *field_num*.""" + result: list[bytes] = [] + for fnum, wtype, payload in _iter_fields(data): + if fnum == field_num and wtype == 2: + assert isinstance(payload, bytes) + result.append(payload) + return result + + +def _get_varint(data: bytes, field_num: int, default: int = 0) -> int: + """Return the first varint value for *field_num*.""" + for fnum, wtype, payload in _iter_fields(data): + if fnum == field_num and wtype == 0: + assert isinstance(payload, int) + return payload + return default + + +# --------------------------------------------------------------------------- +# Base58 encoding (Solana public key representation) +# --------------------------------------------------------------------------- + +_B58_ALPHABET = b"123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz" + + +def _base58_encode(data: bytes) -> str: + """Encode *data* as a base58 string (Bitcoin/Solana alphabet).""" + # Count leading zeros + n_leading = 0 + for b in data: + if b == 0: + n_leading += 1 + else: + break + + # Convert to big integer + num = int.from_bytes(data, "big") + result = bytearray() + while num > 0: + num, rem = divmod(num, 58) + result.append(_B58_ALPHABET[rem]) + result.reverse() + + return ("1" * n_leading) + result.decode("ascii") + + +# --------------------------------------------------------------------------- +# Public API +# --------------------------------------------------------------------------- + + +def programs_for_fixture(path: str | Path) -> list[str]: + """Extract the sorted, deduplicated list of invoked program addresses. + + Navigates: TxnFixture.input.tx.message → account_keys / instructions + to resolve each instruction's ``program_id_index`` into a base58 address. + + Args: + path: Path to a ``.fix`` protobuf file. + + Returns: + Sorted list of unique base58-encoded program addresses invoked by + the transaction. Empty list if the fixture cannot be decoded. + """ + data = Path(path).read_bytes() + + # TxnFixture -> field 2 (input: TxnContext) + txn_context = _get_submessage(data, 2) + if txn_context is None: + return [] + + # TxnContext -> field 1 (tx: SanitizedTransaction) + sanitized_tx = _get_submessage(txn_context, 1) + if sanitized_tx is None: + return [] + + # SanitizedTransaction -> field 1 (message: TransactionMessage) + tx_message = _get_submessage(sanitized_tx, 1) + if tx_message is None: + return [] + + # TransactionMessage -> field 3 (account_keys: repeated bytes) + account_keys = _get_repeated_bytes(tx_message, 3) + + # TransactionMessage -> field 6 (instructions: repeated CompiledInstruction) + instruction_blobs = _get_repeated_bytes(tx_message, 6) + + # Resolve each instruction's program_id_index -> base58 address + programs: set[str] = set() + for ixn_blob in instruction_blobs: + idx = _get_varint(ixn_blob, 1, default=0) + if idx < len(account_keys): + programs.add(_base58_encode(account_keys[idx])) + + return sorted(programs) + + +def programs_for_fixtures(fixture_dir: str | Path) -> dict[str, list[str]]: + """Build a mapping from test_id to invoked program addresses. + + Scans *fixture_dir* for ``*.fix`` files. The test_id is derived from + the filename by stripping the ``.fix`` suffix. + + Args: + fixture_dir: Directory containing ``.fix`` files. + + Returns: + Dict mapping ``test_id`` → sorted list of base58 program addresses. + """ + fixture_dir = Path(fixture_dir) + result: dict[str, list[str]] = {} + for fix_path in sorted(fixture_dir.glob("*.fix")): + test_id = fix_path.stem + result[test_id] = programs_for_fixture(fix_path) + return result + + +# --------------------------------------------------------------------------- +# Well-known Solana program addresses → human-readable labels +# --------------------------------------------------------------------------- + +KNOWN_PROGRAMS: dict[str, str] = { + "11111111111111111111111111111111": "system", + "Config1111111111111111111111111111111111111": "config", + "Stake11111111111111111111111111111111111111": "stake", + "Vote111111111111111111111111111111111111111": "vote", + "ComputeBudget111111111111111111111111111111": "compute-budget", + "BPFLoaderUpgradeab1e11111111111111111111111": "bpf-loader-upgradeable", + "BPFLoader2111111111111111111111111111111111": "bpf-loader", + "BPFLoader1111111111111111111111111111111111": "bpf-loader-deprecated", + "TokenkegQfeZyiNwAJbNbGKPFXCWuBvf9Ss623VQ5DA": "token", + "TokenzQdBNbLqP5VEhdkAS6EPFLC1PHnBqCXEpPxuEb": "token-2022", + "ATokenGPvbdGVxr1b2hvZbsiqW5xWH25efTNsLJA8knL": "associated-token", + "namesLPneVptA9Z5rqUDD9tMTWEJwofgaYwp8cawRkX": "name-service", + "Memo1UhkJBfCR6MNBsmvMuumyHiSZcLYnE2SJ9gQvn4": "memo-v1", + "MemoSq4gqABAXKb96qnH8TysNcWxMyWCqXgDLGmfcHr": "memo-v3", + "AddressLookupTab1e1111111111111111111111111": "address-lookup-table", + "Ed25519SigVerify111111111111111111111111111": "ed25519", + "KeccakSecp256k11111111111111111111111111111": "secp256k1", + "Secp256r1SigVerify1111111111111111111111111": "secp256r1", + "ZkE1Gama1Proof11111111111111111111111111111": "zk-elgamal-proof", + "Sysvar1111111111111111111111111111111111111": "sysvar", + "NativeLoader1111111111111111111111111111111": "native-loader", + "LoaderV411111111111111111111111111111111111": "loader-v4", +} + + +def label_program(address: str) -> str: + """Return a short human-readable label for *address*, or the address itself.""" + return KNOWN_PROGRAMS.get(address, address) + + +def label_programs(addresses: list[str]) -> list[str]: + """Label a list of program addresses.""" + return [label_program(a) for a in addresses] + + +# --------------------------------------------------------------------------- +# Tests +# --------------------------------------------------------------------------- + +import pytest + + +class TestBase58: + """Base58 encoding sanity checks.""" + + def test_all_zeros(self): + assert _base58_encode(b"\x00" * 32) == "1" * 32 + + def test_system_program(self): + assert _base58_encode(b"\x00" * 32) == "1" * 32 + + def test_roundtrip_known(self): + # Verify the system program address (all zeros = "1"*32 in base58, + # but the well-known address "11111111111111111111111111111111" is + # the 32-byte all-zeros key). + assert _base58_encode(b"\x00" * 32) == "1" * 32 + + +class TestDecodeVarint: + def test_single_byte(self): + assert _decode_varint(b"\x05", 0) == (5, 1) + + def test_multi_byte(self): + assert _decode_varint(b"\xac\x02", 0) == (300, 2) + + +class TestIterFields: + def test_simple(self): + # Field 1, wire type 0, value 150 = tag byte 0x08, varint 0x96 0x01 + data = bytes([0x08, 0x96, 0x01]) + fields = _iter_fields(data) + assert len(fields) == 1 + assert fields[0] == (1, 0, 150) + + +class TestFixtureDecoding: + """Test against actual fixture files if available.""" + + _FIXTURE_DIR = Path(__file__).resolve().parents[3] / "env" / "test-vectors" / "txn" / "fixtures" + + def _first_fixture(self) -> Path: + fixtures = sorted(self._FIXTURE_DIR.glob("*.fix")) + if not fixtures: + pytest.skip("No fixture files available") + return fixtures[0] + + def test_programs_for_fixture_returns_nonempty(self): + path = self._first_fixture() + programs = programs_for_fixture(path) + assert len(programs) > 0 + # All entries should be non-empty strings + for p in programs: + assert isinstance(p, str) + assert len(p) > 0 + + def test_programs_are_valid_base58(self): + path = self._first_fixture() + programs = programs_for_fixture(path) + valid_chars = set("123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz") + for p in programs: + assert all(c in valid_chars for c in p), f"Invalid base58: {p}" + + def test_programs_for_fixtures_batch(self): + if not self._FIXTURE_DIR.exists(): + pytest.skip("No fixture directory") + mapping = programs_for_fixtures(self._FIXTURE_DIR) + assert len(mapping) > 0 + # Spot check: every entry has at least one program + empty_count = sum(1 for v in mapping.values() if len(v) == 0) + # A few sanitization-error-only fixtures might have programs too, + # since the input still contains the transaction message. + assert empty_count < len(mapping) + + def test_known_program_labels(self): + assert label_program("11111111111111111111111111111111") == "system" + assert label_program("Vote111111111111111111111111111111111111111") == "vote" + assert label_program("SomeUnknownAddress") == "SomeUnknownAddress" diff --git a/conformance/parseout/parseout/transaction/parser.py b/conformance/parseout/parseout/transaction/parser.py new file mode 100644 index 0000000000..f05ed9a61d --- /dev/null +++ b/conformance/parseout/parseout/transaction/parser.py @@ -0,0 +1,645 @@ +"""Transaction-level parser: raw dicts -> typed dataclasses. + +Layer 2 of the parser stack. Takes the generic ``Block`` dicts produced +by ``parseout.parser.parse`` (Layer 1) and converts them into the +domain-specific transaction dataclass hierarchy. + +Public API: + ``parse(text)`` — text -> OrderedDict[str, Record] + ``parse_file(path)`` — file path -> OrderedDict[str, Record] +""" + +from __future__ import annotations + +from collections import OrderedDict +from dataclasses import dataclass, field +from typing import Optional, Union + +from .. import parser +from ..parser import Block + + +def parse(text: str) -> OrderedDict[str, Record]: + """Parse text into an ordered mapping of test_id -> Record. + + Wires Layer 1 (generic parser) and Layer 2 (typed conversion) together. + + Args: + text: Full file content. + + Returns: + OrderedDict keyed by record test_id, values are typed Record objects. + """ + return _records_from_raw(parser.parse(text)) + + +def parse_file(path: str) -> OrderedDict[str, Record]: + """Parse a file into an ordered mapping of test_id -> Record. + + Args: + path: Path to the text file. + + Returns: + OrderedDict keyed by record test_id, values are typed Record objects. + """ + return _records_from_raw(parser.parse_file(path)) + + +# --------------------------------------------------------------------------- +# Types +# --------------------------------------------------------------------------- + + +@dataclass +class FeeDetails: + """Fee breakdown for a transaction. + + Attributes: + transaction_fee: The base transaction fee. + prioritization_fee: Optional priority fee for faster processing. + """ + + transaction_fee: int = 0 + prioritization_fee: Optional[int] = None + + +@dataclass +class AccountEntry: + """An account that was modified or needs rollback. + + Attributes: + address: Base58-encoded Solana account address. + lamports: Account balance in lamports (u64). Absent in ~1.7% of entries. + data: Optional hex-encoded account data. + executable: Whether the account is executable. Absent means False. + owner: Base58-encoded owner program address. + """ + + address: str = "" + lamports: Optional[int] = None + data: Optional[str] = None + executable: bool = False + owner: str = "" + + +@dataclass +class SanitizationError: + """The transaction failed sanitization before execution. + + Invariants (enforced by data, not by constructor): + - ``status`` is always present. + - ``instruction_error`` may be present; when absent, + ``instruction_error_index`` is also absent. + - ``custom_error`` never appears. + - No fee or account data. + """ + + status: int = 0 + instruction_error: Optional[int] = None + instruction_error_index: Optional[int] = None + + +@dataclass +class ExecutedSuccess: + """The transaction was executed and succeeded (``is_ok=True``). + + Invariants: + - ``fee_details`` is always present. + - ``modified_accounts`` is always non-empty. + - No ``status``, ``instruction_error``, ``rollback_accounts``. + """ + + fee_details: FeeDetails = field(default_factory=FeeDetails) + modified_accounts: list[AccountEntry] = field(default_factory=list) + executed_units: Optional[int] = None + loaded_accounts_data_size: Optional[int] = None + return_data: Optional[str] = None + + +@dataclass +class ExecutedError: + """The transaction was executed but failed (``is_ok=False``). + + Invariants: + - ``fee_details`` is always present. + - ``status`` is always present. + - ``instruction_error`` only when ``status == 9``. + - ``instruction_error_index`` and ``custom_error`` only when + ``instruction_error`` is present. + """ + + status: int = 0 + fee_details: FeeDetails = field(default_factory=FeeDetails) + instruction_error: Optional[int] = None + instruction_error_index: Optional[int] = None + custom_error: Optional[int] = None + modified_accounts: list[AccountEntry] = field(default_factory=list) + rollback_accounts: list[AccountEntry] = field(default_factory=list) + executed_units: Optional[int] = None + loaded_accounts_data_size: Optional[int] = None + return_data: Optional[str] = None + + +#: Union of all result variants. +RecordResult = Union[SanitizationError, ExecutedSuccess, ExecutedError] + + +@dataclass +class Record: + """A single parsed record from the configuration file. + + Each record starts with a header line (the record identifier) followed by + a colon. The ``result`` field holds the variant-specific data. Use + ``isinstance(record.result, ...)`` to discriminate: + + >>> if isinstance(record.result, ExecutedSuccess): + ... print(record.result.fee_details) + + Attributes: + test_id: The full header string (without trailing colon). + result: One of ``SanitizationError``, ``ExecutedSuccess``, or + ``ExecutedError``. + """ + + test_id: str = "" + result: Optional[RecordResult] = None + + +# --------------------------------------------------------------------------- +# Conversion: raw Block dicts -> dataclasses +# --------------------------------------------------------------------------- + + +def _records_from_raw(raw: OrderedDict[str, Block]) -> OrderedDict[str, Record]: + """Convert an entire parsed OrderedDict into an OrderedDict of Records.""" + result: OrderedDict[str, Record] = OrderedDict() + for test_id, block in raw.items(): + result[test_id] = _record_from_entry(test_id, block) + return result + + +def _record_from_entry(test_id: str, block: Block) -> Record: + """Convert a single (test_id, block) pair into a Record.""" + record = Record(test_id=test_id) + + if block.get("sanitization_error") is True: + record.result = SanitizationError( + status=block.get("status", 0), + instruction_error=block.get("instruction_error"), + instruction_error_index=block.get("instruction_error_index"), + ) + elif block.get("executed") is True and block.get("is_ok") is True: + record.result = ExecutedSuccess( + fee_details=_fee_details(block.get("fee_details", {})), + modified_accounts=_account_list(block.get("modified_accounts", [])), + executed_units=block.get("executed_units"), + loaded_accounts_data_size=block.get("loaded_accounts_data_size"), + return_data=block.get("return_data"), + ) + else: + record.result = ExecutedError( + status=block.get("status", 0), + fee_details=_fee_details(block.get("fee_details", {})), + instruction_error=block.get("instruction_error"), + instruction_error_index=block.get("instruction_error_index"), + custom_error=block.get("custom_error"), + modified_accounts=_account_list(block.get("modified_accounts", [])), + rollback_accounts=_account_list(block.get("rollback_accounts", [])), + executed_units=block.get("executed_units"), + loaded_accounts_data_size=block.get("loaded_accounts_data_size"), + return_data=block.get("return_data"), + ) + + return record + + +def _fee_details(raw: Block | dict) -> FeeDetails: + """Convert a fee_details dict into a FeeDetails.""" + return FeeDetails( + transaction_fee=raw.get("transaction_fee", 0), + prioritization_fee=raw.get("prioritization_fee"), + ) + + +def _account_list(raw: list | dict) -> list[AccountEntry]: + """Convert account entries — handles single dict or list of dicts.""" + if isinstance(raw, dict): + return [_account_entry(raw)] + if isinstance(raw, list): + return [_account_entry(item) for item in raw if isinstance(item, dict)] + return [] + + +def _account_entry(raw: dict) -> AccountEntry: + """Convert an account dict into an AccountEntry.""" + return AccountEntry( + address=raw.get("address", ""), + lamports=raw.get("lamports"), + data=raw.get("data"), + executable=raw.get("executable", False), + owner=raw.get("owner", ""), + ) + + +# --------------------------------------------------------------------------- +# Tests +# --------------------------------------------------------------------------- + +import pytest +from ..parser import TestParse + + +class TestHeaderVariants: + """Test that all header formats are correctly parsed.""" + + def test_hex_hash_slot_header(self): + text = "00dc63d6f5824efa900302649eeca7983e5d3f5a_1834424:\nstatus: 9\n" + records = parse(text) + assert len(records) == 1 + r = records["00dc63d6f5824efa900302649eeca7983e5d3f5a_1834424"] + assert r.test_id == "00dc63d6f5824efa900302649eeca7983e5d3f5a_1834424" + + def test_crash_header(self): + text = "crash-0ae873089c62df6e15d3fc0a271485da355874c8:\nsanitization_error: true\nstatus: 9\n" + records = parse(text) + assert "crash-0ae873089c62df6e15d3fc0a271485da355874c8" in records + + def test_uuid_header(self): + text = "aa7aa005-ba12-4d08-935d-18ed0bc1a54f:\nexecuted: true\nstatus: 9\n" + records = parse(text) + assert "aa7aa005-ba12-4d08-935d-18ed0bc1a54f" in records + + def test_name_header(self): + text = "is_signer_err_handling:\nexecuted: true\n" + records = parse(text) + assert "is_signer_err_handling" in records + + +class TestParseMinimal: + """Test parsing of minimal records.""" + + def test_sanitization_error_only(self): + text = """header1: +sanitization_error: true +status: 9 +instruction_error: 3 +""" + r = parse(text)["header1"] + assert isinstance(r.result, SanitizationError) + assert r.result.status == 9 + assert r.result.instruction_error == 3 + assert r.result.instruction_error_index is None + + def test_status_only(self): + text = "header1:\nsanitization_error: true\nstatus: 27\n" + r = parse(text)["header1"] + assert isinstance(r.result, SanitizationError) + assert r.result.status == 27 + + def test_empty_text(self): + assert parse("") == OrderedDict() + assert parse(" \n\n ") == OrderedDict() + + +class TestParseWithFeeDetails: + """Test parsing of records with fee_details blocks.""" + + def test_fee_details_basic(self): + text = """header1: +executed: true +status: 9 +instruction_error: 3 +fee_details { + transaction_fee: 15000 +} +loaded_accounts_data_size: 26626 +""" + r = parse(text)["header1"] + assert isinstance(r.result, ExecutedError) + assert r.result.fee_details.transaction_fee == 15000 + assert r.result.fee_details.prioritization_fee is None + assert r.result.loaded_accounts_data_size == 26626 + + def test_fee_details_with_prioritization(self): + text = """header1: +executed: true +is_ok: true +executed_units: 27829 +fee_details { + transaction_fee: 15000 + prioritization_fee: 1152604553296817704 +} +loaded_accounts_data_size: 20198 +""" + r = parse(text)["header1"] + assert isinstance(r.result, ExecutedSuccess) + assert r.result.executed_units == 27829 + assert r.result.fee_details.transaction_fee == 15000 + assert r.result.fee_details.prioritization_fee == 1152604553296817704 + + +class TestParseModifiedAccounts: + """Test parsing of records with modified_accounts blocks.""" + + def test_single_account(self): + text = """header1: +executed: true +is_ok: true +fee_details { + transaction_fee: 10000 +} +modified_accounts { + address: "8fi2Typkf4m1z9miGfZQGRXDimBTVQqWHciMA9aZGXpN" + lamports: 10733753813112760225 + owner: "11111111111111111111111111111111" +} +""" + r = parse(text)["header1"] + assert isinstance(r.result, ExecutedSuccess) + acct = r.result.modified_accounts[0] + assert acct.address == "8fi2Typkf4m1z9miGfZQGRXDimBTVQqWHciMA9aZGXpN" + assert acct.lamports == 10733753813112760225 + assert acct.owner == "11111111111111111111111111111111" + assert acct.executable is False + assert acct.data is None + + def test_multiple_accounts(self): + text = """header1: +executed: true +is_ok: true +fee_details { + transaction_fee: 5000 +} +modified_accounts { + address: "Addr1" + executable: true + owner: "Owner1" +} +modified_accounts { + address: "Addr2" + owner: "Owner2" +} +""" + r = parse(text)["header1"] + assert isinstance(r.result, ExecutedSuccess) + assert len(r.result.modified_accounts) == 2 + assert r.result.modified_accounts[0].executable is True + assert r.result.modified_accounts[1].executable is False + + def test_account_with_data(self): + text = """header1: +executed: true +is_ok: true +fee_details { + transaction_fee: 5000 +} +modified_accounts { + address: "Addr1" + data: "...137 zeros..." + owner: "Owner1" +} +""" + r = parse(text)["header1"] + assert isinstance(r.result, ExecutedSuccess) + assert r.result.modified_accounts[0].data == "...137 zeros..." + + def test_account_without_lamports(self): + text = """header1: +executed: true +is_ok: true +fee_details { + transaction_fee: 5000 +} +modified_accounts { + address: "Addr1" + owner: "Owner1" +} +""" + r = parse(text)["header1"] + assert isinstance(r.result, ExecutedSuccess) + assert r.result.modified_accounts[0].lamports is None + + +class TestParseRollbackAccounts: + """Test parsing of rollback_accounts blocks.""" + + def test_rollback_accounts(self): + text = """header1: +executed: true +status: 4 +fee_details { + transaction_fee: 10000 +} +rollback_accounts { + address: "Addr1" + lamports: 4295215558015524241 + executable: true + owner: "Owner1" +} +""" + r = parse(text)["header1"] + assert isinstance(r.result, ExecutedError) + assert len(r.result.rollback_accounts) == 1 + assert r.result.rollback_accounts[0].executable is True + + +class TestParseReturnData: + """Test parsing of return_data fields.""" + + def test_return_data(self): + text = """header1: +executed: true +is_ok: true +fee_details { + transaction_fee: 5000 +} +modified_accounts { + address: "Addr1" + owner: "Owner1" +} +return_data: "a500000000000000" +""" + r = parse(text)["header1"] + assert isinstance(r.result, ExecutedSuccess) + assert r.result.return_data == "a500000000000000" + + +class TestParseCustomError: + """Test parsing of custom_error field.""" + + def test_custom_error(self): + text = """header1: +executed: true +status: 9 +instruction_error: 8 +instruction_error_index: 2 +custom_error: 7 +fee_details { + transaction_fee: 5000 +} +""" + r = parse(text)["header1"] + assert isinstance(r.result, ExecutedError) + assert r.result.custom_error == 7 + assert r.result.instruction_error_index == 2 + + +class TestMultipleRecords: + """Test parsing of multiple records separated by dashes.""" + + def test_two_records(self): + text = """header_a: +sanitization_error: true +status: 9 + +-------------------- +header_b: +executed: true +status: 4 +""" + records = parse(text) + assert len(records) == 2 + assert isinstance(records["header_a"].result, SanitizationError) + assert isinstance(records["header_b"].result, ExecutedError) + + def test_three_records_with_trailing_separator(self): + text = """a:\nsanitization_error: true\nstatus: 1\n +-------------------- +b:\nsanitization_error: true\nstatus: 2\n +-------------------- +c:\nsanitization_error: true\nstatus: 3\n +-------------------- +""" + records = parse(text) + assert len(records) == 3 + + def test_preserves_order(self): + text = "bbb:\nsanitization_error: true\nstatus: 1\n\n--------------------\naaa:\nsanitization_error: true\nstatus: 2\n" + records = parse(text) + assert list(records.keys()) == ["bbb", "aaa"] + + +class TestVariantDiscrimination: + """Test that the correct result variant is constructed.""" + + def test_sanitization_error_variant(self): + text = "h:\nsanitization_error: true\nstatus: 9\ninstruction_error: 3\n" + r = parse(text)["h"] + assert isinstance(r.result, SanitizationError) + assert r.result.status == 9 + assert r.result.instruction_error == 3 + + def test_executed_success_variant(self): + text = """h: +executed: true +is_ok: true +executed_units: 1000 +fee_details { + transaction_fee: 5000 +} +modified_accounts { + address: "Addr1" + owner: "Owner1" +} +""" + r = parse(text)["h"] + assert isinstance(r.result, ExecutedSuccess) + assert r.result.fee_details.transaction_fee == 5000 + assert r.result.executed_units == 1000 + assert len(r.result.modified_accounts) == 1 + + def test_executed_error_variant(self): + text = """h: +executed: true +status: 9 +instruction_error: 8 +custom_error: 42 +fee_details { + transaction_fee: 15000 +} +""" + r = parse(text)["h"] + assert isinstance(r.result, ExecutedError) + assert r.result.status == 9 + assert r.result.instruction_error == 8 + assert r.result.custom_error == 42 + + def test_sanitization_error_has_no_fee_or_accounts(self): + text = "h:\nsanitization_error: true\nstatus: 27\n" + r = parse(text)["h"] + assert isinstance(r.result, SanitizationError) + assert not hasattr(r.result, "fee_details") + assert not hasattr(r.result, "modified_accounts") + + +class TestParseFile: + """Test file parsing against the actual data files.""" + + def test_parse_parseme1_record_count(self): + records = parse_file(TestParse.expected()) + assert len(records) == 5244 + for header, r in records.items(): + assert header != "" + assert r.test_id == header + assert r.result is not None + + def test_parse_parseme2_record_count(self): + records = parse_file(TestParse.actual()) + assert len(records) == 5244 + + def test_parseme1_first_record(self): + records = parse_file(TestParse.expected()) + first_key = next(iter(records)) + assert first_key == "00dc63d6f5824efa900302649eeca7983e5d3f5a_1834424" + r = records[first_key] + assert isinstance(r.result, SanitizationError) + assert r.result.status == 9 + assert r.result.instruction_error == 3 + + def test_parseme1_second_record(self): + records = parse_file(TestParse.expected()) + keys = list(records.keys()) + r = records[keys[1]] + assert isinstance(r.result, ExecutedError) + assert r.result.status == 9 + assert r.result.fee_details.transaction_fee == 15000 + assert r.result.loaded_accounts_data_size == 26626 + assert len(r.result.modified_accounts) == 7 + assert r.result.modified_accounts[0].address == "AgzPoim5Zy7s9YaGEvxUzGg5UbrSgsPXMRfebjixXBv4" + assert r.result.modified_accounts[5].data == "...137 zeros..." + assert len(r.result.rollback_accounts) == 1 + + def test_parseme1_third_record_is_ok(self): + records = parse_file(TestParse.expected()) + keys = list(records.keys()) + r = records[keys[2]] + assert isinstance(r.result, ExecutedSuccess) + assert r.result.executed_units == 1306 + assert r.result.fee_details.transaction_fee == 10000 + + def test_parseme1_fourth_record_prioritization_fee(self): + records = parse_file(TestParse.expected()) + keys = list(records.keys()) + r = records[keys[3]] + assert isinstance(r.result, (ExecutedSuccess, ExecutedError)) + assert r.result.fee_details.prioritization_fee == 1152604553296817704 + + def test_parseme1_has_all_header_types(self): + records = parse_file(TestParse.expected()) + assert "crash-0ae873089c62df6e15d3fc0a271485da355874c8" in records + assert "aa7aa005-ba12-4d08-935d-18ed0bc1a54f" in records + assert "is_signer_err_handling" in records + + def test_parseme1_variant_counts(self): + records = parse_file(TestParse.expected()) + san = sum(1 for r in records.values() if isinstance(r.result, SanitizationError)) + ok = sum(1 for r in records.values() if isinstance(r.result, ExecutedSuccess)) + err = sum(1 for r in records.values() if isinstance(r.result, ExecutedError)) + assert san + ok + err == len(records) + assert san > 0 and ok > 0 and err > 0 + + def test_parseme1_all_success_have_modified_accounts(self): + records = parse_file(TestParse.expected()) + for r in records.values(): + if isinstance(r.result, ExecutedSuccess): + assert len(r.result.modified_accounts) > 0 diff --git a/conformance/parseout/pyproject.toml b/conformance/parseout/pyproject.toml new file mode 100644 index 0000000000..28d57eb20a --- /dev/null +++ b/conformance/parseout/pyproject.toml @@ -0,0 +1,15 @@ +[project] +name = "parseout" +version = "0.1.0" +requires-python = ">=3.10" +readme = "README.md" + +[project.scripts] +parseout = "parseout.__main__:main" + +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[tool.pytest.ini_options] +python_files = ["parser.py", "differ.py", "fixture.py"] diff --git a/conformance/run.py b/conformance/run.py index cf03ef66c4..be6ee33619 100755 --- a/conformance/run.py +++ b/conformance/run.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python3.11 +#!/usr/bin/env python3.13 import argparse import json @@ -9,7 +9,7 @@ from collections import defaultdict -conformance_dir = os.path.dirname(os.path.abspath(__file__)) +conformance_dir = os.path.dirname(os.path.realpath(__file__)) try: from typer.testing import CliRunner @@ -18,13 +18,9 @@ print( textwrap.dedent( f""" - To create the environment: - - {conformance_dir}/scripts/setup-env.sh - To activate the environment: - source {conformance_dir}/env/pyvenv/bin/activate + nix develop """ ) @@ -51,7 +47,6 @@ def main(): parser.add_argument("--no-run", action="store_true", help="Don't exec fixtures (only create)") parser.add_argument("-v", "--verbose", action="store_true", help="Print commands and output") parser.add_argument("--filter", help="Filter for fixture to execute") - parser.add_argument("--save", help="File to save results in json") parser.add_argument( "--create-lib", default=create_lib, @@ -70,6 +65,11 @@ def main(): help="Run each fixture with a separate invocation of solana-test-suite. " "This makes it easier to see which fixture caused a panic, but takes longer.", ) + parser.add_argument( + "--use-created", + action="store_true", + help="Run with the expectation from the generated fixtures instead of vectors.", + ) config = parser.parse_args() @@ -100,9 +100,19 @@ def main(): print(f"\tFailed: {total_failed}") print(f"\tSkipped: {total_skipped}") - if config.save: - with open(config.save, "w") as f: - json.dump(results, f, indent=4) + with open("env/test-outputs/results.json", "w") as f: + json.dump(results, f, indent=4) + + failures = [] + for result in results: + for fixture in result.get("failed_fixtures", []): + failures.append(os.path.join(result["name"], fixture + ".fix\n")) + failures.sort() + + with open("env/test-outputs/failing.txt", "w") as f: + f.writelines(failures) + + print(f"\nDetailed test results saved to env/test-outputs/\n") def path(path): @@ -117,11 +127,12 @@ def run_test(vectors, config, pad): print_noln(f"{vectors:<{pad}}") vectors_path = path(f"env/test-vectors/{vectors}") - fixtures_path = path(f"env/test-fixtures/{vectors}") + created_path = path(f"env/created-fixtures/{vectors}") + to_run_path = created_path if config.use_created or config.create else vectors_path outputs_folder = os.path.dirname(vectors) if vectors.endswith(".fix") else vectors outputs_path = path(f"env/test-outputs/{outputs_folder}") - if not os.path.exists(vectors_path): - os.makedirs(fixtures_path, exist_ok=True) + if not os.path.exists(created_path): + os.makedirs(created_path, exist_ok=True) if config.create: @@ -130,7 +141,7 @@ def run_test(vectors, config, pad): "create-fixtures", "--num-processes", config.num_processes, "-i", vectors_path, - "-o", fixtures_path, + "-o", created_path, "-s", config.create_lib, ], verbose=config.verbose, @@ -153,11 +164,11 @@ def run_test(vectors, config, pad): "skipped": 0, "failed_fixtures": [], } - filenames = sorted(os.listdir(fixtures_path)) + filenames = sorted(os.listdir(to_run_path)) pad = max(len(os.path.join(vectors, f)) for f in filenames) for fixture in filenames: print_noln(f"{os.path.join(vectors, fixture):<{pad}}") - fixture_path = os.path.join(fixtures_path, fixture) + fixture_path = os.path.join(to_run_path, fixture) one_result = exec_fixtures(config, fixture_path, outputs_path) result["passed"] += one_result.get("passed", 0) result["failed"] += one_result.get("failed", 0) @@ -165,7 +176,7 @@ def run_test(vectors, config, pad): result["failed_fixtures"].extend(one_result.get("failed_fixtures", [])) return result else: - result = exec_fixtures(config, fixtures_path, outputs_path) + result = exec_fixtures(config, to_run_path, outputs_path) result["name"] = vectors return result @@ -183,21 +194,24 @@ def exec_fixtures(config, fixtures_path, outputs_path): ) # fmt: on - if result.exit_code != 0: - return {"failed": 1} - - summary = result.stdout.split("\n")[3] - passed = int(summary.split(",")[0].split(": ")[1]) - failed = int(summary.split(",")[1].split(": ")[1]) - skipped = int(summary.split(",")[2].split(": ")[1]) + passed = 0 + failed = 1 # assume failure unless there is a good output + skipped = 0 + failed_fixtures = [] + if result.exit_code == 0: + try: + summary = result.stdout.split("\n")[3] + passed = int(summary.split(",")[0].split(": ")[1]) + failed = int(summary.split(",")[1].split(": ")[1]) + skipped = int(summary.split(",")[2].split(": ")[1]) + if failed > 0: + failed_fixtures = result.stdout.split("\n")[4].strip("Failed tests: ").strip() + failed_fixtures = json.loads(failed_fixtures.replace("'", '"')) + except IndexError: + pass # reached when the harness panics print(f" │ Pass{passed:>5} │ Fail{failed:>5} │ Skip{skipped:>5}") - failed_fixtures = [] - if failed > 0: - failed_fixtures = result.stdout.split("\n")[4].strip("Failed tests: ").strip() - failed_fixtures = json.loads(failed_fixtures.replace("'", '"')) - return { "passed": passed, "failed": failed, diff --git a/conformance/scripts/ci-run.sh b/conformance/scripts/ci-run.sh index 138c48c236..e8564fba81 100755 --- a/conformance/scripts/ci-run.sh +++ b/conformance/scripts/ci-run.sh @@ -1,61 +1,33 @@ #!/usr/bin/env bash +# # This script runs the `solana_conformance` step in CI. + set -euxo pipefail -PREBUILT_LIB_DIR=${PREBUILT_LIB_DIR:-../workspace/conformance-release/lib} +SPLIT_TESTS=${SPLIT_TESTS:-false} +PREBUILT_LIB_DIR=${PREBUILT_LIB_DIR:-zig-out/lib} NUM_THREADS=${NUM_THREADS:-$(nproc || sysctl -n hw.ncpu || echo 1)} conformance_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" cd $conformance_dir -. commits.env - -echo Setup solana-conformance -sudo add-apt-repository ppa:deadsnakes/ppa -y -sudo apt install -y python3.11 python3.11-dev python3.11-venv rename gcc -scripts/setup-env.sh get-solana-conformance - -echo Get test fixtures -wget https://github.com/Syndica/conformance-fixtures/releases/download/test-vectors-$TEST_VECTORS_COMMIT-solfuzz-agave-$SOLFUZZ_AGAVE_COMMIT/fixtures.tar.zst -tar xf fixtures.tar.zst -# We need each fixture file to have a unique name so that we can put them into the test-inputs directory -# in a "flat" manner. The problem is that there are just too many files and it exceeds ARG_MAX for a single -# "rename" invocation. So we need to go the round-about method of first finding the files then using "xargs" -# to chunk it MAX-ARGS. Testing shows that just using the maximum number from xargs is around twice as fast -# as specifying an arg limit manually. -find test-fixtures/vm_interp/fixtures/v0 -type f -name '*.fix' -print0 | xargs -0 rename "s/\.fix\$/-v0.fix/" -find test-fixtures/vm_interp/fixtures/v1 -type f -name '*.fix' -print0 | xargs -0 rename "s/\.fix\$/-v1.fix/" -find test-fixtures/vm_interp/fixtures/v2 -type f -name '*.fix' -print0 | xargs -0 rename "s/\.fix\$/-v2.fix/" - -echo Parse fixture lists -PASSING_DIRS=() - -set +x -while IFS= read -r line; do - if [[ -n "$line" ]]; then - PASSING_DIRS+=("$line") - fi -done < "scripts/fixtures.txt" - -FIXTURES=() - -for dir in "${PASSING_DIRS[@]}"; do - while IFS= read -r -d '' file; do - FIXTURES+=("$file") - done < <(find "test-fixtures/$dir" -type f -name '*.fix' -print0) -done - -mkdir -p split-fixtures/ -printf "%s\n" "${FIXTURES[@]}" \ - | circleci tests split \ - | xargs -d '\n' -I{} cp "{}" split-fixtures/ -set -x - -export LD_PRELOAD=/lib/x86_64-linux-gnu/libasan.so.8 -export ASAN_OPTIONS=detect_leaks=0 -echo "Running fixtures" -env/pyvenv/bin/solana-conformance exec-fixtures \ +echo Selecting a subset of fixtures to run +rm -rf env/split-fixtures/ +mkdir -p env/split-fixtures/ +# Finds all .fix files in test-vectors, excludes known failures, splits across +# CI nodes, and links the selected subset into split-fixtures +comm -23 \ + <(find "env/test-vectors/" \ + -path 'env/test-vectors/block/*' -prune \ + -o -type f -name '*.fix' -printf '%P\n' | sort) \ + <(sort scripts/failing.txt) \ + | if [ "$SPLIT_TESTS" == "true" ]; then circleci tests split; else cat; fi \ + | sed 's_^_env/test-vectors/_' \ + | xargs -d '\n' cp -t env/split-fixtures/ + +echo Running fixtures +solana-conformance exec-fixtures \ --num-processes $NUM_THREADS \ -t ${PREBUILT_LIB_DIR}/libsolfuzz_sig.so \ - -o test_results/ \ - -i split-fixtures/ | tee /dev/tty | grep -q "Failed: 0," + -o env/test_results/ \ + -i env/split-fixtures/ | tee /dev/tty | grep -q "Failed: 0," diff --git a/conformance/scripts/failing.txt b/conformance/scripts/failing.txt index 8b13789179..90296b1545 100644 --- a/conformance/scripts/failing.txt +++ b/conformance/scripts/failing.txt @@ -1 +1,182 @@ - +instr/fixtures/zk_sdk/a0_7f8c0067c3ad9e1b.fix +instr/fixtures/zk_sdk/a11_d7cdabf3af12487d.fix +instr/fixtures/zk_sdk/a13_379ab95b603bfbe4.fix +instr/fixtures/zk_sdk/a15_dcddbae46a61d865.fix +instr/fixtures/zk_sdk/a16_3280762193911d15.fix +instr/fixtures/zk_sdk/a17_9351289cb0ab6c49.fix +instr/fixtures/zk_sdk/a18_3fba1553127beac4.fix +instr/fixtures/zk_sdk/a2_dc1311dcafd818d7.fix +instr/fixtures/zk_sdk/a3_8b05dffa3991ce20.fix +instr/fixtures/zk_sdk/a4_bb2336989dc4a34d.fix +instr/fixtures/zk_sdk/a5_5bd139ba8c43e199.fix +instr/fixtures/zk_sdk/a6_3c257db2305d61d7.fix +instr/fixtures/zk_sdk/a7_89497c02cf43312b.fix +instr/fixtures/zk_sdk/a8_ee93b2bad194edd6.fix +instr/fixtures/zk_sdk/a9_4920e8ff8149d60f.fix +instr/fixtures/zk_sdk/acc0_bebebc0c335d0027.fix +instr/fixtures/zk_sdk/acc11_4671635d91d771c3.fix +instr/fixtures/zk_sdk/acc13_779e4724849e5d1f.fix +instr/fixtures/zk_sdk/acc15_69ecb9c439a0a3d3.fix +instr/fixtures/zk_sdk/acc16_22f4789faf31013a.fix +instr/fixtures/zk_sdk/acc17_d580e07deb9979eb.fix +instr/fixtures/zk_sdk/acc18_222fa5dec00bd6df.fix +instr/fixtures/zk_sdk/acc2_25bbad7adb043902.fix +instr/fixtures/zk_sdk/acc3_93638051fe9a9032.fix +instr/fixtures/zk_sdk/acc4_346b401eede57b23.fix +instr/fixtures/zk_sdk/acc5_9f182deb9212323a.fix +instr/fixtures/zk_sdk/acc6_c14d3513e55b3462.fix +instr/fixtures/zk_sdk/acc7_692fb904040751ff.fix +instr/fixtures/zk_sdk/acc8_9a740675ffb453dc.fix +instr/fixtures/zk_sdk/acc9_64c38631c7cc437d.fix +instr/fixtures/zk_sdk/verif0_a29b1e9d744344a8.fix +instr/fixtures/zk_sdk/verif10_bfdef4b3ba4e391e.fix +instr/fixtures/zk_sdk/verif11_1dbab83a16fe6a1b.fix +instr/fixtures/zk_sdk/verif12_a7048539b4b8e411.fix +instr/fixtures/zk_sdk/verif13_dfa26efa65994081.fix +instr/fixtures/zk_sdk/verif14_f02f235aadf7d4e5.fix +instr/fixtures/zk_sdk/verif15_807420250b9780e0.fix +instr/fixtures/zk_sdk/verif16_2c0a2aaa7e50895e.fix +instr/fixtures/zk_sdk/verif17_fa91301db4ac9aa0.fix +instr/fixtures/zk_sdk/verif18_d71f9e54266c9636.fix +instr/fixtures/zk_sdk/verif19_eb8a282364165fc5.fix +instr/fixtures/zk_sdk/verif20_ed4d78e940a37141.fix +instr/fixtures/zk_sdk/verif21_2e343a6532d14d06.fix +instr/fixtures/zk_sdk/verif22_d17e45ba8032fa69.fix +instr/fixtures/zk_sdk/verif23_613434c5fc608a19.fix +instr/fixtures/zk_sdk/verif24_e867c8d239810c2a.fix +instr/fixtures/zk_sdk/verif25_3f2701ce1bf35455.fix +instr/fixtures/zk_sdk/verif5_be19352e8c9e7991.fix +instr/fixtures/zk_sdk/verif6_f8b5d5099faa223c.fix +instr/fixtures/zk_sdk/verif9_d7810f7a7111987a.fix +instr/fixtures/zk_sdk/w0_8ec5005274801773.fix +instr/fixtures/zk_sdk/w11_19df8196a540fec2.fix +instr/fixtures/zk_sdk/w13_21ed12da0cc9b04b.fix +instr/fixtures/zk_sdk/w15_09e751443b873cf7.fix +instr/fixtures/zk_sdk/w16_a06c5b45412cac74.fix +instr/fixtures/zk_sdk/w17_e882a520e3faf5cc.fix +instr/fixtures/zk_sdk/w18_de611fd4e365b353.fix +instr/fixtures/zk_sdk/w2_ab09648c62fc0677.fix +instr/fixtures/zk_sdk/w3_2b66de454d6fa439.fix +instr/fixtures/zk_sdk/w4_9f8009abcc7f4e4a.fix +instr/fixtures/zk_sdk/w5_2e5b3199f8d5b61c.fix +instr/fixtures/zk_sdk/w6_00a5ddd8b671cc85.fix +instr/fixtures/zk_sdk/w7_a0e0c80764d4d7e9.fix +instr/fixtures/zk_sdk/w8_2d9ce48583a46bee.fix +instr/fixtures/zk_sdk/w9_181202ef9796e3c7.fix +instr/fixtures/zk_sdk/wacc0_4565bf244f7d3c94.fix +instr/fixtures/zk_sdk/wacc11_6ea7c70a4400aa1e.fix +instr/fixtures/zk_sdk/wacc13_690bdf1be9ab6b1f.fix +instr/fixtures/zk_sdk/wacc15_ae77b943bc1ccebe.fix +instr/fixtures/zk_sdk/wacc16_7da88539580c7826.fix +instr/fixtures/zk_sdk/wacc17_ad6c57f722bcf5b1.fix +instr/fixtures/zk_sdk/wacc18_a2d8de38b14b1c68.fix +instr/fixtures/zk_sdk/wacc2_4d2624adb43ecdf4.fix +instr/fixtures/zk_sdk/wacc3_7e9654124e2af720.fix +instr/fixtures/zk_sdk/wacc4_fa6d9b4c086d5a97.fix +instr/fixtures/zk_sdk/wacc5_6a9b1525cf8f5452.fix +instr/fixtures/zk_sdk/wacc6_5f3bf70e8572e52b.fix +instr/fixtures/zk_sdk/wacc7_a08f10406aee35a4.fix +instr/fixtures/zk_sdk/wacc8_4370fd00b238a5a0.fix +instr/fixtures/zk_sdk/wacc9_d13848d723bd8cfa.fix +txn/fixtures/006cec687919df8ddc4fd111e60e4864c7a94a79_2288978.fix +txn/fixtures/0179c4cbb9a7110eb246247ca09517720f960904_2194958.fix +txn/fixtures/021f0cfecbd5edd05af52b24c1374d5dd13c36c0_3213804.fix +txn/fixtures/0a4807f84f6dd1c2760b3712df3846e5aa930906_2410587.fix +txn/fixtures/0b6e7a7ed010c514e7b2e121988380ce84e61d2e_2655582.fix +txn/fixtures/0df6674eadbc1117e90b2ce85190f56fd773a58b_2879459.fix +txn/fixtures/0f44c4f29823b842e36eaf773bfea339e33aecb0_2692287.fix +txn/fixtures/0fb4a1d8c2d58a0a97b81b18880cae0e46b8a360_265678.fix +txn/fixtures/0fbf14533d0939462c80b5b77920a3e9b9867dff_2213556.fix +txn/fixtures/12643da37bc7cedeaec9a7c2bc4ac191417a7170_2195021.fix +txn/fixtures/130b22b824fc407eef7dc5b75d398787f58d2c15_265678.fix +txn/fixtures/142f3b2b185b954e36696311b3b84744508a80d5_2361208.fix +txn/fixtures/18b4e56da90940d580612d378203e29ab87cc63e_3090691.fix +txn/fixtures/1a3584a220639d46eb8b03a7069ca7d3796b8659_2372912.fix +txn/fixtures/1a4aec43fd827c9a084dc5fe267c0b3f968a9fb8_2235143.fix +txn/fixtures/1a729174d141352354cb70c0e74bd0521c17888e_1656878.fix +txn/fixtures/1fff2d993f4d7677640b2b3397320496b076e854_3506222.fix +txn/fixtures/209c7df958b10b061a8bfe3f6725c4b8802feb77_3200920.fix +txn/fixtures/20f2b0e913522ed8eaa8afebd362146105f1dfe7_3126852.fix +txn/fixtures/21ec480f4864655312f3b116cc2ed26a868d5d26_2953099.fix +txn/fixtures/21fb4857674201e9f6c173c1f2e83556718c401e_1556049.fix +txn/fixtures/2a159c214135a36023fae14efcfbecd62b230823_2631802.fix +txn/fixtures/2aaf16427235cc0a1cf9c9ff7fe902fae3c10cc6_2767000.fix +txn/fixtures/306012c394c4f582374f72eb1f191d06de6e4ea6_2522260.fix +txn/fixtures/3095efec5218f66db3fd3765b672bfeec6362b2c_2546518.fix +txn/fixtures/324154e969e4a719afe7b079a559f0e59ab3f748_2510065.fix +txn/fixtures/326f6aa740e4e6060cc4fc272f8d1e5173a0347a_2484212.fix +txn/fixtures/32ceb6737de9a3108558bc59bd1d2299402e1759_2187779.fix +txn/fixtures/338a0b377eb143b9d2520fd81d6b8658ea71d32a_1183730.fix +txn/fixtures/3799b046a3717e43b056b8724374c468336b9d17_3177139.fix +txn/fixtures/3b45ebbb089bb436aafb68056077413f7da53a2c_2336991.fix +txn/fixtures/3b6d03c12d620a69d7ac3e69dbecf486f5fcb2b4_2855179.fix +txn/fixtures/4434bab1dd322682afd18b96c06554aca3ce0d8d_3001829.fix +txn/fixtures/44b6d4ad676c785f99cccfeffca43f73f8834c84_2213508.fix +txn/fixtures/45dd4971a31ae58798211458d14551780be22c7a_1633256.fix +txn/fixtures/4ab2840c81a2bad348c7c5ecbb632148264f8950_3078035.fix +txn/fixtures/4f391d783f130b20ff187e266caeea545b3bee1e_2817460.fix +txn/fixtures/5103b2cca444d542d930b64acaaf3a605f64ab73_2213737.fix +txn/fixtures/542e30d2e06079f3ddcb767c58ac5463618bcd07_2619392.fix +txn/fixtures/55b9b95d1d6ce9783d3ed11f9f53695363a9c701_1655830.fix +txn/fixtures/55e8b5cda33cfe673e05331d2c9e0fe7eaad6e21_2196906.fix +txn/fixtures/5e0a5f5573e108afc7d427dc5bf04f397461a297_3102855.fix +txn/fixtures/61308114e7370765a28bf524e5a1ed0d6cefd5ac_2754507.fix +txn/fixtures/683098a43650542407f5e2673dcbed1ff761c745_2423046.fix +txn/fixtures/6b8aad11eda157f83dab534e8066a49ed74ba761_2460271.fix +txn/fixtures/6c7485b4807cedf24a234fdf455ae9d2603508c1_2583468.fix +txn/fixtures/6ed605260361eed1bcd6d50676fd4f10a34cf779_2717633.fix +txn/fixtures/745eb2958bca9d8abbd5ead8839c53632104e89b_2398324.fix +txn/fixtures/74f9f1dda20c9d76b349e7a45be78e25e133b987_2729628.fix +txn/fixtures/7bc6797c6c983885b1a15dd876e195f7014a1ccc_2705158.fix +txn/fixtures/805e0cd05df6ceb015a08086e9bcfab51f559747_180823.fix +txn/fixtures/81a6be81485f9bdb807fdf7fc81b2782c512d7a6_2196843.fix +txn/fixtures/849d28ffcec100ce4572d341bc9fe4f74fb9d0b7_2140423.fix +txn/fixtures/84f3a1500cd82eb29554c49640400e9d96783dc6_2395779.fix +txn/fixtures/850384bbd4d1b8c25de0c7516cc4b823801ee570_265678.fix +txn/fixtures/8c00535b747c262681aa014f688dafb22c34546a_1631431.fix +txn/fixtures/8cc0cafd6602956e0c3fc35c1844578785b9337f_2312950.fix +txn/fixtures/8d58fe7ee312bd41d25b223ece2e632441cb7fa9_2292395.fix +txn/fixtures/900e0f020d6c8d0915ed19a5c54adafd979a3390_2277057.fix +txn/fixtures/9be1ddb02934e8c8876427e2b074a5051f713a32_1656662.fix +txn/fixtures/9c42ab0d666a2d86cb138bf0e972e845666782a1_2448028.fix +txn/fixtures/a2ea969d9af56dc3ac5ebe3b7bfc2bd544fec487_3164284.fix +txn/fixtures/a40a338847d331360470dc22de6d431944ef950c_3014147.fix +txn/fixtures/a7727947178b8fac2f917d7eaf1b6cc5ff37aa1b_2792063.fix +txn/fixtures/a81b5ff9ac6a841103606d5c7aee49ba99d597b7_2867137.fix +txn/fixtures/a8d497234c17b71302e6c69c152e154ae6529ebe_2137938.fix +txn/fixtures/aa41354131885b471f301100e5a614a16b21cbc4_3139208.fix +txn/fixtures/abd3e18d9c1672c7ab644be17ae30c921e72c4dd_1631283.fix +txn/fixtures/b252f2a3680a0649e9acad79001c028234000d0e_2301141.fix +txn/fixtures/b49ee10e80e1b8f61cb8e921f05248887bf8f547_2210077.fix +txn/fixtures/b5feb69c36929915f3d24aaa79cd5786eee5aad7_1047123.fix +txn/fixtures/b91c038fe6bc76b330f4ae8719a9e8c47a8d28ed_434577.fix +txn/fixtures/bbcdc9d246f7a7af53e1683e9ee51599f1bf2513_2384774.fix +txn/fixtures/bf70c3db4ecc7db10de91aec692078da50bd8612_2977975.fix +txn/fixtures/c3319fc3a5c525a7612fb139ddd49862e01fd909_2766868.fix +txn/fixtures/cab41c4453c9760b32a6015be5823cce3750b032_2009105.fix +txn/fixtures/cea9e470d627d36509718775fbe4b2585b9d44c1_2679940.fix +txn/fixtures/crash-eb9e4a94882cda840fa4d8d8a390564ad7443ee2.fix +txn/fixtures/d07b9d0ebc285fb6446aa45e14c69972a7e42cce_2842091.fix +txn/fixtures/d07c388a409ef2e239e7fbb38a1974564a1efd28_2990044.fix +txn/fixtures/d0bc26bbc043d91e92fa6386b54557347ff4b971_3052099.fix +txn/fixtures/d4a5ca366478576979ac7d454fa9b3a82e411642_2892039.fix +txn/fixtures/d4af641fd573e18924936d96308e49ebd849da9d_2138001.fix +txn/fixtures/d608421f8e583e766d30ccf57edb4e71c224a99d_2137433.fix +txn/fixtures/d67f442716145cf5645900048f5d35466cc88ac5_1631585.fix +txn/fixtures/da6b4a8a98d554f0ae230a8a3c2c13d3b6075e4f_2435227.fix +txn/fixtures/daea754f668170f42ae2d5fb4e8a62ba4ecaf2a2_265678.fix +txn/fixtures/dc1ec0a63c4f7e278ccf01a282dcca8c48fb07c5_3038936.fix +txn/fixtures/dfc4e91f6924adb421b44c8a311c4bc1a0d4dd90_265678.fix +txn/fixtures/e40b8950d4fa05015bb55eca6844516f77f7e5e0_2941208.fix +txn/fixtures/e447bfcf38d7be9d28c994fe8cbd3ea09820acc5_265678.fix +txn/fixtures/e52d51ce0cfd30e401db4051cdbd3fff6f8a2804_2964935.fix +txn/fixtures/e7ea7ace535e604a8e6ee07cf791292490bd29e3_3882152.fix +txn/fixtures/e852802fb52344637279d37a5a04cefeb62c5b99_2325257.fix +txn/fixtures/e8b60fde5331ac70a2bf831fa0cab4a3e1d2dc26_2471891.fix +txn/fixtures/ea4712553ea96d1145b4c66f12f7cb29d1aca394_2140488.fix +txn/fixtures/ea61f1835dbf6f41385197459b9396f66e3af963_2210014.fix +txn/fixtures/f0322421d97e91c1a247d6a978bae2e30b51dab3_2668441.fix +txn/fixtures/f731c7a693492de08dca9d1b665ba3fb204c7218_3026660.fix +txn/fixtures/faa94a86ce1c305065c1eab52d5d9695d3d4bf12_2829813.fix +txn/fixtures/fb1305b2e30e46cc2a321004c061888e84953432_1657414.fix +txn/fixtures/fd4262cdc8011b36f47747a0af84aaeb3f3650dc_265678.fix diff --git a/conformance/scripts/fixtures.txt b/conformance/scripts/fixtures.txt index 35a904473a..1f97ebc56c 100644 --- a/conformance/scripts/fixtures.txt +++ b/conformance/scripts/fixtures.txt @@ -38,11 +38,4 @@ syscall/fixtures/sol_get_sysvar syscall/fixtures/stack_height syscall/fixtures/try_find_program_address syscall/fixtures/vm -txn/fixtures/precompile/ed25519 -txn/fixtures/precompile/secp256k1 -txn/fixtures/precompile/secp256r1 -txn/fixtures/programs -vm_interp/fixtures/latest -vm_interp/fixtures/v0 -vm_interp/fixtures/v1 -vm_interp/fixtures/v2 +txn/fixtures diff --git a/conformance/scripts/setup-env.sh b/conformance/scripts/setup-env.sh deleted file mode 100755 index 0987956be5..0000000000 --- a/conformance/scripts/setup-env.sh +++ /dev/null @@ -1,104 +0,0 @@ -#!/usr/bin/env bash -set -euo pipefail - -conformance_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" -env="$conformance_dir/env" -. "$conformance_dir/commits.env" - -get-repo-at-commit() { - local repo_url=$1 - local commit=$2 - local dir="$3" - - mkdir -p "$env" - if [[ ! -d "$dir" ]]; then - echo "Cloning $repo_url at $commit" - if git clone --revision=$commit --depth=1 $repo_url "$dir"; then - return 0 - else - git clone $repo_url "$dir" - fi - fi - echo "Resetting $dir to $commit" - pushd "$dir" - git fetch origin $commit - git reset --hard $commit - popd -} - -full-setup() { - if [[ -d "$env" ]]; then - echo Any local changes you have to solana-conformance, solfuzz-agave, or test-vectors will be deleted. - read -p "Do you want to continue? [y/N]: " confirm - if [[ ! "$confirm" =~ ^[Yy]$ ]]; then - echo "Aborted." - exit 1 - fi - fi - - get-solfuzz-agave - get-test-vectors - get-solana-conformance -} - -get-solfuzz-agave() { - get-repo-at-commit \ - https://github.com/firedancer-io/solfuzz-agave.git \ - $SOLFUZZ_AGAVE_COMMIT \ - "$env/solfuzz-agave" - - get-repo-at-commit \ - https://github.com/firedancer-io/protosol.git \ - $AGAVE_PROTOSOL_COMMIT \ - "$env/solfuzz-agave/protosol" - - # build vendored protoc and flatc - pushd "$env/solfuzz-agave/protosol" - git submodule update --init --recursive - ./deps.sh - popd - - export PROTOC_EXECUTABLE="$env/solfuzz-agave/protosol/opt/bin/protoc" - export FLATC_EXECUTABLE="$env/solfuzz-agave/protosol/opt/bin/flatc" - - pushd "$env/solfuzz-agave" - cargo build --lib --release - popd -} - -get-test-vectors() { - get-repo-at-commit \ - https://github.com/firedancer-io/test-vectors.git \ - $TEST_VECTORS_COMMIT \ - "$env/test-vectors" -} - -get-solana-conformance() { - get-repo-at-commit \ - https://github.com/firedancer-io/solana-conformance.git \ - $SOLANA_CONFORMANCE_COMMIT \ - "$env/solana-conformance" - - # set up the python venv to run solana conformance - python3.11 -m venv "$env/pyvenv" - source "$env/pyvenv/bin/activate" - - pushd "$env/solana-conformance" - pip install -e ".[dev]" - pre-commit install - popd - - cat <<-EOF - - Local environment created successfully. To activate it, run: - - source $env/pyvenv/bin/activate - -EOF -} - -if [ $# -eq 0 ]; then - full-setup -else - "$@" -fi diff --git a/conformance/scripts/verify-commits.sh b/conformance/scripts/verify-commits.sh deleted file mode 100755 index 09feb52c77..0000000000 --- a/conformance/scripts/verify-commits.sh +++ /dev/null @@ -1,17 +0,0 @@ -#!/usr/bin/env bash -set -euo pipefail - -# This script is a basic sanity check that the commit hashes in commits.env are -# consistent with our CI configuration. It's not perfect but checks for some -# obvious inconsistencies. - -trap 'echo ❌ Failed to verify conformance commit at $LINENO: $BASH_COMMAND' ERR - -# ensure working directory is repository root -cd $(dirname "${BASH_SOURCE[0]}")/../.. - -. conformance/commits.env - -grep "// current commit: $SIG_PROTOSOL_COMMIT" conformance/build.zig > /dev/null - -echo ✅ Verified conformance commits. diff --git a/conformance/src/elf_loader.zig b/conformance/src/elf_loader.zig index 7ef7e365fb..07ab1790e8 100644 --- a/conformance/src/elf_loader.zig +++ b/conformance/src/elf_loader.zig @@ -1,55 +1,56 @@ const std = @import("std"); const sig = @import("sig"); -const pb = @import("proto/org/solana/sealevel/v1.pb.zig"); +const types = @import("elf_types.zig"); -const ELFLoaderCtx = pb.ELFLoaderCtx; -const ElfLoaderEffects = pb.ELFLoaderEffects; +const ELFLoaderCtx = types.ELFLoaderCtx; +const ElfLoaderEffects = types.ELFLoaderEffects; const svm = sig.vm; const elf = svm.elf; -export fn sol_compat_elf_loader_v1( +export fn sol_compat_elf_loader_v2( out_ptr: [*]u8, out_size: *u64, in_ptr: [*]const u8, in_size: u64, ) i32 { - errdefer |err| std.debug.panic("err: {s}", .{@errorName(err)}); - const allocator = std.heap.c_allocator; + testAndHandleIO(out_ptr, out_size, in_ptr, in_size) catch |e| { + std.debug.print("error: {s}\n", .{@errorName(e)}); + return -1; + }; + return 0; +} - _ = &allocator; +fn testAndHandleIO( + out_ptr: [*]u8, + out_size: *u64, + in_ptr: [*]const u8, + in_size: u64, +) !void { + const allocator = std.heap.c_allocator; // zig_protobuf leaks sometimes on invalid input, so we just work around with by using an arena var decode_arena = std.heap.ArenaAllocator.init(allocator); defer decode_arena.deinit(); - var reader = std.io.Reader.fixed(in_ptr[0..in_size]); - var ctx = ELFLoaderCtx.decode(&reader, decode_arena.allocator()) catch return 0; + var ctx = try ELFLoaderCtx.decode(decode_arena.allocator(), in_ptr[0..in_size]); defer ctx.deinit(decode_arena.allocator()); - var elf_effects = executeElfTest(ctx, allocator) catch return 0; + var elf_effects = try executeElfTest(ctx, allocator); defer elf_effects.deinit(allocator); - var writer: std.io.Writer.Allocating = .init(allocator); - defer writer.deinit(); - try elf_effects.encode(&writer.writer, allocator); - const effect_bytes = writer.written(); - - const out_slice = out_ptr[0..out_size.*]; - if (effect_bytes.len > out_slice.len) return 0; - - @memcpy(out_slice[0..effect_bytes.len], effect_bytes); + const effect_bytes = elf_effects.encode(); + if (out_size.* < effect_bytes.len) return error.OutputTooSmall; + @memcpy(out_ptr[0..effect_bytes.len], &effect_bytes); out_size.* = effect_bytes.len; - return 1; } fn executeElfTest(ctx: ELFLoaderCtx, allocator: std.mem.Allocator) !ElfLoaderEffects { - const ctx_elf = ctx.elf orelse return error.Unknown; - const elf_bytes = ctx_elf.data; + const elf_bytes = ctx.elf orelse return error.Unknown; var feature_set: sig.core.FeatureSet = .ALL_DISABLED; if (ctx.features) |features| for (features.features.items) |id| { - feature_set.setSlotId(id, 0) catch std.debug.panic("unknown id: 0x{x}", .{id}); + feature_set.setSlotId(id, 0) catch std.debug.print("unknown feature id: 0x{x}\n", .{id}); }; const env: svm.Environment = .initV1( diff --git a/conformance/src/elf_types.zig b/conformance/src/elf_types.zig new file mode 100644 index 0000000000..fcc4525872 --- /dev/null +++ b/conformance/src/elf_types.zig @@ -0,0 +1,180 @@ +const std = @import("std"); + +const Allocator = std.mem.Allocator; +const xxhash = std.hash.XxHash64.hash; +const writeInt = std.mem.writeInt; + +/// Wrapper for the ELF binary and the features that the loader should use +/// Note that we currently hardcode the features to be used by the loader, +/// so features isn't actually used yet. +pub const ELFLoaderCtx = struct { + elf: ?[]const u8 = null, + features: ?FeatureSet = null, + deploy_checks: bool = false, + + pub fn deinit(self: *const ELFLoaderCtx, allocator: Allocator) void { + if (self.features) |features| features.deinit(allocator); + } + + /// Decode a flatbuffer ELFLoaderCtx into the protobuf ELFLoaderCtx struct. + /// The returned struct borrows elf.data directly from buf, so buf must + /// outlive the result. + pub fn decode( + allocator: Allocator, + buf: []const u8, + ) error{ OutOfMemory, InsufficientData }!ELFLoaderCtx { + const root_pos = try decodeInt(u32, buf[0..]); + const vt: VTable = try .decode(buf, root_pos); + + return .{ + .elf = if (vt.field(0)) |offset| + try decodeUbyteVec(buf, root_pos + offset) + else + null, + .features = if (vt.field(1)) |offset| + try .decode(allocator, buf, try deref(buf, root_pos + offset)) + else + null, + .deploy_checks = if (vt.field(2)) |offset| buf[root_pos + offset] != 0 else false, + }; + } +}; + +/// Captures the results of a elf binary load. +/// Structurally similar to fd_sbpf_program_t +pub const ELFLoaderEffects = struct { + rodata: []const u8 = &.{}, + rodata_sz: u64 = 0, + text_cnt: u64 = 0, + text_off: u64 = 0, + entry_pc: u64 = 0, + calldests: std.ArrayListUnmanaged(u64) = .empty, + @"error": i32 = 0, + + pub fn deinit(self: *const ELFLoaderEffects, allocator: Allocator) void { + allocator.free(self.rodata); + var cd = self.calldests; + cd.deinit(allocator); + } + + pub const buf_size = 72; + + /// Encode an ELFLoaderEffects into a 72-byte FlatBuffer. + pub fn encode(self: *const ELFLoaderEffects) [buf_size]u8 { + const meta: EncodeMeta = .{ + .rodata_hash = self.rodata.len != 0, + .calldests_hash = self.calldests.items.len != 0, + }; + var buf: [buf_size]u8 = template(meta); + + buf[28] = @truncate(@as(u32, @bitCast(self.@"error"))); + if (meta.rodata_hash) { + writeInt(u64, buf[32..40], xxhash(0, self.rodata), .little); + } + writeInt(u64, buf[40..48], self.text_cnt, .little); + writeInt(u64, buf[48..56], self.text_off, .little); + writeInt(u64, buf[56..64], self.entry_pc, .little); + if (meta.calldests_hash) { + const calldests_bytes = std.mem.sliceAsBytes(self.calldests.items); + writeInt(u64, buf[64..72], xxhash(0, calldests_bytes), .little); + } + + return buf; + } + + const EncodeMeta = struct { + rodata_hash: bool, + calldests_hash: bool, + }; + + inline fn template(meta: EncodeMeta) [buf_size]u8 { + var buf: [buf_size]u8 = @splat(0); + // root offset: table at byte 24 + writeInt(u32, buf[0..4], 24, .little); + // vtable (bytes 4..20): vt_size, table_size, 6 field offsets + writeInt(u16, buf[4..6], 16, .little); // vt_size (2+2+6*2 = 16) + writeInt(u16, buf[6..8], 48, .little); // table inline size + writeInt(u16, buf[8..10], 4, .little); // field 0: err_code + writeInt(u16, buf[10..12], if (meta.rodata_hash) 8 else 0, .little); // field 1: rodata_hash + writeInt(u16, buf[12..14], 16, .little); // field 2: text_cnt + writeInt(u16, buf[14..16], 24, .little); // field 3: text_off + writeInt(u16, buf[16..18], 32, .little); // field 4: entry_pc + writeInt(u16, buf[18..20], if (meta.calldests_hash) 40 else 0, .little); // field 5: calldests_hash + // bytes 20..24: padding (already zero) + // soffset: table_pos(24) - vtable_pos(4) = 20 + writeInt(i32, buf[24..28], 20, .little); + return buf; + } +}; + +pub const FeatureSet = struct { + features: std.ArrayListUnmanaged(u64) = .empty, + + pub fn deinit(self: *const FeatureSet, allocator: Allocator) void { + var features = self.features; + features.deinit(allocator); + } + + pub fn decode( + allocator: Allocator, + buf: []const u8, + pos: usize, + ) error{ OutOfMemory, InsufficientData }!FeatureSet { + const vt: VTable = try .decode(buf, pos); + var result: FeatureSet = .{}; + if (vt.field(0)) |offset| { + const vec_pos = try deref(buf, pos + offset); + const len = try decodeInt(u32, buf[vec_pos..]); + try result.features.ensureTotalCapacity(allocator, len); + for (0..len) |i| { + result.features.appendAssumeCapacity( + try decodeInt(u64, buf[vec_pos + 4 + i * 8 ..]), + ); + } + } + return result; + } +}; + +const VTable = struct { + buf: []const u8, + /// start of vtable in buf + pos: usize, + /// vtable size in bytes + size: usize, + + fn decode(buf: []const u8, table_pos: usize) error{InsufficientData}!VTable { + const soff = try decodeInt(i32, buf[table_pos..]); + const vt_pos: usize = @intCast(@as(i64, @intCast(table_pos)) - soff); + const vt_size = try decodeInt(u16, buf[vt_pos..]); + return .{ .buf = buf, .pos = vt_pos, .size = vt_size }; + } + + /// Return the field offset (relative to table start) for field `index`, or null if absent. + fn field(self: VTable, index: usize) ?usize { + const entry = 4 + index * 2; // first field entry is at byte 4 in vtable + if (entry + 2 > self.size) return null; + const offset = std.mem.readInt(u16, self.buf[self.pos + entry ..][0..2], .little); + if (offset == 0) return null; + return offset; + } +}; + +/// Follow a uoffset_t: return the position it points to. +fn deref(buf: []const u8, pos: usize) error{InsufficientData}!usize { + if (pos + 4 > buf.len) return error.InsufficientData; + return pos + try decodeInt(u32, buf[pos..]); +} + +fn decodeUbyteVec(buf: []const u8, field_pos: usize) error{InsufficientData}![]const u8 { + const vec_pos = try deref(buf, field_pos); + const len = try decodeInt(u32, buf[vec_pos..]); + const start = vec_pos + 4; + if (start + len > buf.len) return error.InsufficientData; + return buf[start .. start + len]; +} + +fn decodeInt(Int: type, buf: []const u8) error{InsufficientData}!Int { + if (buf.len < @sizeOf(Int)) return error.InsufficientData; + return std.mem.readInt(Int, buf[0..@sizeOf(Int)], .little); +} diff --git a/conformance/src/proto/org/solana/sealevel/v1.pb.zig b/conformance/src/proto/org/solana/sealevel/v1.pb.zig index 28e8b70406..755b052ae6 100644 --- a/conformance/src/proto/org/solana/sealevel/v1.pb.zig +++ b/conformance/src/proto/org/solana/sealevel/v1.pb.zig @@ -390,25 +390,19 @@ pub const FeeRateGovernor = struct { } }; -/// EpochContext includes context scoped to an epoch. -/// On "real" ledgers, it is created during the epoch boundary. -pub const EpochContext = struct { - features: ?FeatureSet = null, - hashes_per_tick: u64 = 0, - ticks_per_slot: u64 = 0, - inflation: ?Inflation = null, - genesis_creation_time: u64 = 0, - vote_accounts_t_1: std.ArrayListUnmanaged(VoteAccount) = .empty, - vote_accounts_t_2: std.ArrayListUnmanaged(VoteAccount) = .empty, +pub const EpochSchedule = struct { + slots_per_epoch: u64 = 0, + leader_schedule_slot_offset: u64 = 0, + warmup: bool = false, + first_normal_epoch: u64 = 0, + first_normal_slot: u64 = 0, pub const _desc_table = .{ - .features = fd(1, .submessage), - .hashes_per_tick = fd(2, .{ .scalar = .uint64 }), - .ticks_per_slot = fd(3, .{ .scalar = .uint64 }), - .inflation = fd(5, .submessage), - .genesis_creation_time = fd(6, .{ .scalar = .uint64 }), - .vote_accounts_t_1 = fd(11, .{ .repeated = .submessage }), - .vote_accounts_t_2 = fd(12, .{ .repeated = .submessage }), + .slots_per_epoch = fd(1, .{ .scalar = .uint64 }), + .leader_schedule_slot_offset = fd(2, .{ .scalar = .uint64 }), + .warmup = fd(3, .{ .scalar = .bool }), + .first_normal_epoch = fd(4, .{ .scalar = .uint64 }), + .first_normal_slot = fd(5, .{ .scalar = .uint64 }), }; /// Encodes the message to the writer @@ -475,103 +469,15 @@ pub const EpochContext = struct { } }; -/// SlotContext includes context scoped to a block. -/// On "real" ledgers, it is created during the slot boundary. -pub const SlotContext = struct { - slot: u64 = 0, - block_height: u64 = 0, - poh: []const u8 = &.{}, - parent_bank_hash: []const u8 = &.{}, - parent_lthash: []const u8 = &.{}, - prev_slot: u64 = 0, - prev_lps: u64 = 0, - prev_epoch_capitalization: u64 = 0, - fee_rate_governor: ?FeeRateGovernor = null, - parent_signature_count: u64 = 0, - - pub const _desc_table = .{ - .slot = fd(1, .{ .scalar = .fixed64 }), - .block_height = fd(2, .{ .scalar = .fixed64 }), - .poh = fd(3, .{ .scalar = .bytes }), - .parent_bank_hash = fd(4, .{ .scalar = .bytes }), - .parent_lthash = fd(5, .{ .scalar = .bytes }), - .prev_slot = fd(6, .{ .scalar = .fixed64 }), - .prev_lps = fd(7, .{ .scalar = .uint64 }), - .prev_epoch_capitalization = fd(8, .{ .scalar = .uint64 }), - .fee_rate_governor = fd(9, .submessage), - .parent_signature_count = fd(10, .{ .scalar = .uint64 }), - }; - - /// Encodes the message to the writer - /// The allocator is used to generate submessages internally. - /// Hence, an ArenaAllocator is a preferred choice if allocations are a bottleneck. - pub fn encode( - self: @This(), - writer: *std.Io.Writer, - allocator: std.mem.Allocator, - ) (std.Io.Writer.Error || std.mem.Allocator.Error)!void { - return protobuf.encode(writer, allocator, self); - } - - /// Decodes the message from the bytes read from the reader. - pub fn decode( - reader: *std.Io.Reader, - allocator: std.mem.Allocator, - ) (protobuf.DecodingError || std.Io.Reader.Error || std.mem.Allocator.Error)!@This() { - return protobuf.decode(@This(), reader, allocator); - } - - /// Deinitializes and frees the memory associated with the message. - pub fn deinit(self: *@This(), allocator: std.mem.Allocator) void { - return protobuf.deinit(allocator, self); - } - - /// Duplicates the message. - pub fn dupe(self: @This(), allocator: std.mem.Allocator) std.mem.Allocator.Error!@This() { - return protobuf.dupe(@This(), self, allocator); - } - - /// Decodes the message from the JSON string. - pub fn jsonDecode( - input: []const u8, - options: std.json.ParseOptions, - allocator: std.mem.Allocator, - ) !std.json.Parsed(@This()) { - return protobuf.json.decode(@This(), input, options, allocator); - } - - /// Encodes the message to a JSON string. - pub fn jsonEncode( - self: @This(), - options: std.json.Stringify.Options, - allocator: std.mem.Allocator, - ) ![]const u8 { - return protobuf.json.encode(self, options, allocator); - } - - /// This method is used by std.json - /// internally for deserialization. DO NOT RENAME! - pub fn jsonParse( - allocator: std.mem.Allocator, - source: anytype, - options: std.json.ParseOptions, - ) !@This() { - return protobuf.json.parse(@This(), allocator, source, options); - } - - /// This method is used by std.json - /// internally for serialization. DO NOT RENAME! - pub fn jsonStringify(self: *const @This(), jws: anytype) !void { - return protobuf.json.stringify(@This(), self, jws); - } -}; - -/// FixtureMetadata includes the metadata for the fixture -pub const FixtureMetadata = struct { - fn_entrypoint: []const u8 = &.{}, +pub const Rent = struct { + lamports_per_byte_year: u64 = 0, + exemption_threshold: f64 = 0, + burn_percent: u32 = 0, pub const _desc_table = .{ - .fn_entrypoint = fd(1, .{ .scalar = .string }), + .lamports_per_byte_year = fd(1, .{ .scalar = .uint64 }), + .exemption_threshold = fd(2, .{ .scalar = .double }), + .burn_percent = fd(3, .{ .scalar = .uint32 }), }; /// Encodes the message to the writer @@ -638,11 +544,25 @@ pub const FixtureMetadata = struct { } }; -pub const ELFBinary = struct { - data: []const u8 = &.{}, +/// EpochContext includes context scoped to an epoch. +/// On "real" ledgers, it is created during the epoch boundary. +pub const EpochContext = struct { + features: ?FeatureSet = null, + hashes_per_tick: u64 = 0, + ticks_per_slot: u64 = 0, + inflation: ?Inflation = null, + genesis_creation_time: u64 = 0, + vote_accounts_t_1: std.ArrayListUnmanaged(VoteAccount) = .empty, + vote_accounts_t_2: std.ArrayListUnmanaged(VoteAccount) = .empty, pub const _desc_table = .{ - .data = fd(1, .{ .scalar = .bytes }), + .features = fd(1, .submessage), + .hashes_per_tick = fd(2, .{ .scalar = .uint64 }), + .ticks_per_slot = fd(3, .{ .scalar = .uint64 }), + .inflation = fd(5, .submessage), + .genesis_creation_time = fd(6, .{ .scalar = .uint64 }), + .vote_accounts_t_1 = fd(11, .{ .repeated = .submessage }), + .vote_accounts_t_2 = fd(12, .{ .repeated = .submessage }), }; /// Encodes the message to the writer @@ -709,18 +629,31 @@ pub const ELFBinary = struct { } }; -/// Wrapper for the ELF binary and the features that the loader should use -/// Note that we currently hardcode the features to be used by the loader, -/// so features isn't actually used yet. -pub const ELFLoaderCtx = struct { - elf: ?ELFBinary = null, - features: ?FeatureSet = null, - deploy_checks: bool = false, +/// SlotContext includes context scoped to a block. +/// On "real" ledgers, it is created during the slot boundary. +pub const SlotContext = struct { + slot: u64 = 0, + block_height: u64 = 0, + poh: []const u8 = &.{}, + parent_bank_hash: []const u8 = &.{}, + parent_lthash: []const u8 = &.{}, + prev_slot: u64 = 0, + prev_lps: u64 = 0, + prev_epoch_capitalization: u64 = 0, + fee_rate_governor: ?FeeRateGovernor = null, + parent_signature_count: u64 = 0, pub const _desc_table = .{ - .elf = fd(1, .submessage), - .features = fd(2, .submessage), - .deploy_checks = fd(4, .{ .scalar = .bool }), + .slot = fd(1, .{ .scalar = .fixed64 }), + .block_height = fd(2, .{ .scalar = .fixed64 }), + .poh = fd(3, .{ .scalar = .bytes }), + .parent_bank_hash = fd(4, .{ .scalar = .bytes }), + .parent_lthash = fd(5, .{ .scalar = .bytes }), + .prev_slot = fd(6, .{ .scalar = .fixed64 }), + .prev_lps = fd(7, .{ .scalar = .uint64 }), + .prev_epoch_capitalization = fd(8, .{ .scalar = .uint64 }), + .fee_rate_governor = fd(9, .submessage), + .parent_signature_count = fd(10, .{ .scalar = .uint64 }), }; /// Encodes the message to the writer @@ -787,25 +720,14 @@ pub const ELFLoaderCtx = struct { } }; -/// Captures the results of a elf binary load. -/// Structurally similar to fd_sbpf_program_t -pub const ELFLoaderEffects = struct { - rodata: []const u8 = &.{}, - rodata_sz: u64 = 0, - text_cnt: u64 = 0, - text_off: u64 = 0, - entry_pc: u64 = 0, - calldests: std.ArrayListUnmanaged(u64) = .empty, - @"error": i32 = 0, +/// A single entry in the blockhash queue. +pub const BlockhashQueueEntry = struct { + blockhash: []const u8 = &.{}, + lamports_per_signature: u64 = 0, pub const _desc_table = .{ - .rodata = fd(1, .{ .scalar = .bytes }), - .rodata_sz = fd(2, .{ .scalar = .uint64 }), - .text_cnt = fd(4, .{ .scalar = .uint64 }), - .text_off = fd(5, .{ .scalar = .uint64 }), - .entry_pc = fd(6, .{ .scalar = .uint64 }), - .calldests = fd(7, .{ .packed_repeated = .{ .scalar = .uint64 } }), - .@"error" = fd(8, .{ .scalar = .int32 }), + .blockhash = fd(1, .{ .scalar = .bytes }), + .lamports_per_signature = fd(2, .{ .scalar = .uint64 }), }; /// Encodes the message to the writer @@ -872,15 +794,12 @@ pub const ELFLoaderEffects = struct { } }; -pub const ELFLoaderFixture = struct { - metadata: ?FixtureMetadata = null, - input: ?ELFLoaderCtx = null, - output: ?ELFLoaderEffects = null, +/// FixtureMetadata includes the metadata for the fixture +pub const FixtureMetadata = struct { + fn_entrypoint: []const u8 = &.{}, pub const _desc_table = .{ - .metadata = fd(1, .submessage), - .input = fd(2, .submessage), - .output = fd(3, .submessage), + .fn_entrypoint = fd(1, .{ .scalar = .string }), }; /// Encodes the message to the writer @@ -1031,8 +950,7 @@ pub const InstrContext = struct { instr_accounts: std.ArrayListUnmanaged(InstrAcct) = .empty, data: []const u8 = &.{}, cu_avail: u64 = 0, - slot_context: ?SlotContext = null, - epoch_context: ?EpochContext = null, + features: ?FeatureSet = null, pub const _desc_table = .{ .program_id = fd(1, .{ .scalar = .bytes }), @@ -1040,8 +958,7 @@ pub const InstrContext = struct { .instr_accounts = fd(4, .{ .repeated = .submessage }), .data = fd(5, .{ .scalar = .bytes }), .cu_avail = fd(6, .{ .scalar = .uint64 }), - .slot_context = fd(8, .submessage), - .epoch_context = fd(9, .submessage), + .features = fd(10, .submessage), }; /// Encodes the message to the writer @@ -2875,98 +2792,24 @@ pub const SanitizedTransaction = struct { } }; -/// This Transaction context be used to fuzz either `load_execute_and_commit_transactions`, -/// `load_and_execute_transactions` in `bank.rs` or `load_and_execute_sanitized_transactions` -/// in `svm/transaction_processor.rs` -pub const TxnContext = struct { - tx: ?SanitizedTransaction = null, - account_shared_data: std.ArrayListUnmanaged(AcctState) = .empty, - blockhash_queue: std.ArrayListUnmanaged([]const u8) = .empty, - epoch_ctx: ?EpochContext = null, - slot_ctx: ?SlotContext = null, - - pub const _desc_table = .{ - .tx = fd(1, .submessage), - .account_shared_data = fd(2, .{ .repeated = .submessage }), - .blockhash_queue = fd(3, .{ .repeated = .{ .scalar = .bytes } }), - .epoch_ctx = fd(4, .submessage), - .slot_ctx = fd(5, .submessage), - }; - - /// Encodes the message to the writer - /// The allocator is used to generate submessages internally. - /// Hence, an ArenaAllocator is a preferred choice if allocations are a bottleneck. - pub fn encode( - self: @This(), - writer: *std.Io.Writer, - allocator: std.mem.Allocator, - ) (std.Io.Writer.Error || std.mem.Allocator.Error)!void { - return protobuf.encode(writer, allocator, self); - } - - /// Decodes the message from the bytes read from the reader. - pub fn decode( - reader: *std.Io.Reader, - allocator: std.mem.Allocator, - ) (protobuf.DecodingError || std.Io.Reader.Error || std.mem.Allocator.Error)!@This() { - return protobuf.decode(@This(), reader, allocator); - } - - /// Deinitializes and frees the memory associated with the message. - pub fn deinit(self: *@This(), allocator: std.mem.Allocator) void { - return protobuf.deinit(allocator, self); - } - - /// Duplicates the message. - pub fn dupe(self: @This(), allocator: std.mem.Allocator) std.mem.Allocator.Error!@This() { - return protobuf.dupe(@This(), self, allocator); - } - - /// Decodes the message from the JSON string. - pub fn jsonDecode( - input: []const u8, - options: std.json.ParseOptions, - allocator: std.mem.Allocator, - ) !std.json.Parsed(@This()) { - return protobuf.json.decode(@This(), input, options, allocator); - } - - /// Encodes the message to a JSON string. - pub fn jsonEncode( - self: @This(), - options: std.json.Stringify.Options, - allocator: std.mem.Allocator, - ) ![]const u8 { - return protobuf.json.encode(self, options, allocator); - } - - /// This method is used by std.json - /// internally for deserialization. DO NOT RENAME! - pub fn jsonParse( - allocator: std.mem.Allocator, - source: anytype, - options: std.json.ParseOptions, - ) !@This() { - return protobuf.json.parse(@This(), allocator, source, options); - } - - /// This method is used by std.json - /// internally for serialization. DO NOT RENAME! - pub fn jsonStringify(self: *const @This(), jws: anytype) !void { - return protobuf.json.stringify(@This(), self, jws); - } -}; - -/// The resulting state of an account after a transaction -pub const ResultingState = struct { - acct_states: std.ArrayListUnmanaged(AcctState) = .empty, - rent_debits: std.ArrayListUnmanaged(RentDebits) = .empty, - transaction_rent: u64 = 0, +/// Bank fields relevant to transaction execution +pub const TxnBank = struct { + blockhash_queue: std.ArrayListUnmanaged(BlockhashQueueEntry) = .empty, + rbh_lamports_per_signature: u32 = 0, + fee_rate_governor: ?FeeRateGovernor = null, + total_epoch_stake: u64 = 0, + epoch_schedule: ?EpochSchedule = null, + rent: ?Rent = null, + features: ?FeatureSet = null, pub const _desc_table = .{ - .acct_states = fd(1, .{ .repeated = .submessage }), - .rent_debits = fd(2, .{ .repeated = .submessage }), - .transaction_rent = fd(3, .{ .scalar = .uint64 }), + .blockhash_queue = fd(1, .{ .repeated = .submessage }), + .rbh_lamports_per_signature = fd(2, .{ .scalar = .uint32 }), + .fee_rate_governor = fd(3, .submessage), + .total_epoch_stake = fd(4, .{ .scalar = .uint64 }), + .epoch_schedule = fd(5, .submessage), + .rent = fd(6, .submessage), + .features = fd(7, .submessage), }; /// Encodes the message to the writer @@ -3033,14 +2876,18 @@ pub const ResultingState = struct { } }; -/// The rent state for an account after a transaction -pub const RentDebits = struct { - pubkey: []const u8 = &.{}, - rent_collected: i64 = 0, +/// This Transaction context be used to fuzz either `load_execute_and_commit_transactions`, +/// `load_and_execute_transactions` in `bank.rs` or `load_and_execute_sanitized_transactions` +/// in `svm/transaction_processor.rs` +pub const TxnContext = struct { + tx: ?SanitizedTransaction = null, + account_shared_data: std.ArrayListUnmanaged(AcctState) = .empty, + bank: ?TxnBank = null, pub const _desc_table = .{ - .pubkey = fd(1, .{ .scalar = .bytes }), - .rent_collected = fd(2, .{ .scalar = .int64 }), + .tx = fd(1, .submessage), + .account_shared_data = fd(2, .{ .repeated = .submessage }), + .bank = fd(6, .submessage), }; /// Encodes the message to the writer @@ -3184,8 +3031,6 @@ pub const FeeDetails = struct { pub const TxnResult = struct { executed: bool = false, sanitization_error: bool = false, - resulting_state: ?ResultingState = null, - rent: u64 = 0, is_ok: bool = false, status: u32 = 0, instruction_error: u32 = 0, @@ -3195,12 +3040,12 @@ pub const TxnResult = struct { executed_units: u64 = 0, fee_details: ?FeeDetails = null, loaded_accounts_data_size: u64 = 0, + modified_accounts: std.ArrayListUnmanaged(AcctState) = .empty, + rollback_accounts: std.ArrayListUnmanaged(AcctState) = .empty, pub const _desc_table = .{ .executed = fd(1, .{ .scalar = .bool }), .sanitization_error = fd(2, .{ .scalar = .bool }), - .resulting_state = fd(3, .submessage), - .rent = fd(4, .{ .scalar = .uint64 }), .is_ok = fd(5, .{ .scalar = .bool }), .status = fd(6, .{ .scalar = .uint32 }), .instruction_error = fd(7, .{ .scalar = .uint32 }), @@ -3210,6 +3055,8 @@ pub const TxnResult = struct { .executed_units = fd(11, .{ .scalar = .uint64 }), .fee_details = fd(12, .submessage), .loaded_accounts_data_size = fd(13, .{ .scalar = .uint64 }), + .modified_accounts = fd(14, .{ .repeated = .submessage }), + .rollback_accounts = fd(15, .{ .repeated = .submessage }), }; /// Encodes the message to the writer diff --git a/conformance/src/txn_execute.zig b/conformance/src/txn_execute.zig index 4caa09ed6b..0b2c967d62 100644 --- a/conformance/src/txn_execute.zig +++ b/conformance/src/txn_execute.zig @@ -73,7 +73,6 @@ const Atomic = std.atomic.Value; const features = sig.core.features; const freeze = sig.replay.freeze; const program = sig.runtime.program; -const sysvars = sig.runtime.sysvar; const vm = sig.vm; const transaction_execution = sig.runtime.transaction_execution; const update_sysvar = sig.replay.update_sysvar; @@ -104,6 +103,7 @@ const TransactionInstruction = sig.core.transaction.Instruction; const TransactionAddressLookup = sig.core.transaction.AddressLookup; const AccountSharedData = sig.runtime.AccountSharedData; +const Clock = sig.runtime.sysvar.Clock; const ComputeBudget = sig.runtime.ComputeBudget; const EpochRewards = sig.runtime.sysvar.EpochRewards; const EpochSchedule = sig.runtime.sysvar.EpochSchedule; @@ -140,6 +140,9 @@ fn executeTxnContext( var accounts_map = try loadAccountsMap(allocator, &pb_txn_ctx); defer deinitMapAndValues(allocator, accounts_map); + const clock = getSysvarFromAccounts(allocator, Clock, &accounts_map); + const fixture_slot: Slot = if (clock) |c| c.slot else 10; + // TODO: use?? // const fee_collector = Pubkey.parseRuntime("1111111111111111111111111111111111") catch unreachable; @@ -217,6 +220,7 @@ fn executeTxnContext( // https://github.com/firedancer-io/agave/blob/10fe1eb29aac9c236fd72d08ae60a3ef61ee8353/runtime/src/bank.rs#L1162 { try ancestors.addSlot(allocator, 0); + try ancestors.addSlot(allocator, fixture_slot); // bank.compute_budget = runtime_config.compute_budget; // bank.transaction_account_lock_limit = null; // bank.transaction_debug_keys = null; @@ -371,44 +375,6 @@ fn executeTxnContext( try epoch_stakes_map.put(allocator, e, .EMPTY); } - const update_sysvar_deps = update_sysvar.UpdateSysvarAccountDeps{ - .slot = slot, - .slot_store = account_store.forSlot(slot, &ancestors), - .capitalization = &capitalization, - .rent = &genesis_config.rent, - }; - - try update_sysvar.updateStakeHistory( - allocator, - .{ - .epoch = epoch, - .parent_slots_epoch = null, // no parent yet - .stakes_cache = &stakes_cache, - .update_sysvar_deps = update_sysvar_deps, - }, - ); - _ = try update_sysvar.updateClock(allocator, .{ - .feature_set = &feature_set, - .epoch_schedule = &epoch_schedule, - .epoch_stakes = epoch_stakes_map.getPtr(epoch), - .stakes_cache = &stakes_cache, - .epoch = epoch, - .parent_slots_epoch = null, // no parent yet - .genesis_creation_time = genesis_config.creation_time, - .ns_per_slot = @intCast(genesis_config.nsPerSlot()), - .update_sysvar_deps = update_sysvar_deps, - }); - try update_sysvar.updateRent(allocator, genesis_config.rent, update_sysvar_deps); - try update_sysvar.updateEpochSchedule(allocator, epoch_schedule, update_sysvar_deps); - try update_sysvar.updateRecentBlockhashes(allocator, &blockhash_queue, update_sysvar_deps); - try update_sysvar.updateLastRestartSlot( - allocator, - &feature_set, - slot, - &hard_forks, - update_sysvar_deps, - ); - // NOTE: Agave fills the sysvar cache here, we should not need for txn fuzzing as the sysvar cache is only used in the SVM, so we can // populate immediately before executing transactions. (I think....) } @@ -440,215 +406,170 @@ fn executeTxnContext( account_store.reader(), ); - parent_slot = slot; + slot = fixture_slot; + parent_slot = slot -| 1; parent_hash = slot_hash; const parent_slots_epoch = epoch; - slot = loadSlot(&pb_txn_ctx); - if (slot > 0) { - // Bank::new_from_parent(...) - { - // Clone epoch schedule - // epoch_schedule = epoch_schedule; - - // Get epoch - epoch = epoch_schedule.getEpoch(slot); - - // Clone accounts db - // let (rc, bank_rc_creation_time_us) = measure_us!({ - // let accounts_db = Arc::clone(&parent.rc.accounts.accounts_db); - // BankRc { - // accounts: Arc::new(Accounts::new(accounts_db)), - // parent: RwLock::new(Some(Arc::clone(&parent))), - // bank_id_generator: Arc::clone(&parent.rc.bank_id_generator), - // } - // }); - - // Clone status_cache - // const status_cache = parent.status_cache.clone(); - - // Derive new fee rate governor - fee_rate_governor = FeeRateGovernor.initDerived( - &fee_rate_governor, - 0, // parent.signature_count() - ); - - // Get bank id - // let bank_id = rc.bank_id_generator.fetch_add(1, Relaxed) + 1; - - // Clone blockhash queue - // blockhash_queue = blockhash_queue; - - // Clone stakes cache - // const stakes_cache = parent.stakes_cache.clone(); + // Bank::new_from_parent(...) + { + // Clone epoch schedule + // epoch_schedule = epoch_schedule; + + // Get epoch + epoch = epoch_schedule.getEpoch(slot); + + // Clone accounts db + // let (rc, bank_rc_creation_time_us) = measure_us!({ + // let accounts_db = Arc::clone(&parent.rc.accounts.accounts_db); + // BankRc { + // accounts: Arc::new(Accounts::new(accounts_db)), + // parent: RwLock::new(Some(Arc::clone(&parent))), + // bank_id_generator: Arc::clone(&parent.rc.bank_id_generator), + // } + // }); + + // Clone status_cache + // const status_cache = parent.status_cache.clone(); + + // Derive new fee rate governor + fee_rate_governor = FeeRateGovernor.initDerived( + &fee_rate_governor, + 0, // parent.signature_count() + ); - // Clone epoch stakes - // epoch_stakes = epoch_stakes; + // Get bank id + // let bank_id = rc.bank_id_generator.fetch_add(1, Relaxed) + 1; - // Create new transaction processor - // const transaction_processor = TransactionBatchProcessor::new_from(&parent.transaction_processor, slot, epoch); + // Clone blockhash queue + // blockhash_queue = blockhash_queue; - // Clone rewards pool pubkeys - // const rewards_pools = parent.rewards_pools.clone(); + // Clone stakes cache + // const stakes_cache = parent.stakes_cache.clone(); - // Clone transaction debug keys - // const transaction_debug_keys = parent.transaction_debug_keys.clone(); + // Clone epoch stakes + // epoch_stakes = epoch_stakes; - // Clone transaction log collector config - // const transaction_log_collector_config = parent.transaction_log_collector_config.clone(); + // Create new transaction processor + // const transaction_processor = TransactionBatchProcessor::new_from(&parent.transaction_processor, slot, epoch); - // Clone feature set - // feature_set = feature_set; + // Clone rewards pool pubkeys + // const rewards_pools = parent.rewards_pools.clone(); - // Get initial accounts data size - // const initial_accounts_data_size = parent.load_accounts_data_size(); + // Clone transaction debug keys + // const transaction_debug_keys = parent.transaction_debug_keys.clone(); - // Init new bank -- lots of copying of fields here - // var new = Bank{...} + // Clone transaction log collector config + // const transaction_log_collector_config = parent.transaction_log_collector_config.clone(); - // Create ancestors with new slot and all parent slots - try ancestors.addSlot(allocator, slot); + // Clone feature set + // feature_set = feature_set; - // Update epoch - if (parent_slots_epoch < epoch) { - // Bank::process_new_epoch(...) + // Get initial accounts data size + // const initial_accounts_data_size = parent.load_accounts_data_size(); - try bank_methods.applyFeatureActivations( - allocator, - slot, - &feature_set, - account_store, - true, - ); + // Init new bank -- lots of copying of fields here + // var new = Bank{...} - // stakes_cache.activateEpoch(); - // Since the stakes cache is empty, we don't need to actually do anything here except add - // an entry for the parent epoch with zero stakes. - // https://github.com/firedancer-io/agave/blob/10fe1eb29aac9c236fd72d08ae60a3ef61ee8353/runtime/src/stakes.rs#L297 - { - const stakes, var stakes_guard = stakes_cache.stakes.writeWithLock(); - defer stakes_guard.unlock(); - stakes.epoch = epoch; - std.debug.assert(stakes.stake_history.entries.len == 0); - stakes.stake_history.entries.appendAssumeCapacity(.{ - .epoch = parent_slots_epoch, - .stake = .{ - .effective = 0, - .activating = 0, - .deactivating = 0, - }, - }); - } + // Create ancestors with new slot and all parent slots + try ancestors.addSlot(allocator, parent_slot); + try ancestors.addSlot(allocator, slot); - const leader_schedule_epoch = epoch_schedule.getLeaderScheduleEpoch(slot); - // Since stakes cache is empty, we just need to insert an empty stakes entry - // into the epoch stakes map at the leader schedule epoch stakes map if it is not present - // updateEpochStakes(leader_schedule_epoch); - if (!epoch_stakes_map.contains(leader_schedule_epoch)) - try epoch_stakes_map.put( - allocator, - leader_schedule_epoch, - .EMPTY, - ); - - // Bank::begin_partitioned_epoch_rewards(...) - // Similar to the above, epoch rewards is set but nothing meaningful is computed - // since there are no staked nodes or rewards to distribute. - // See: EpochRewards Debug Log: 0a73c09ab08f77e00b0faa8cf0d70408113b0a92_265678.fix - const epoch_rewards = EpochRewards{ - .distribution_starting_block_height = 2, - .num_partitions = 1, - .parent_blockhash = blockhash_queue.last_hash.?, - .total_points = 0, - .total_rewards = 0, - .distributed_rewards = 0, - .active = true, - }; - try update_sysvar.updateSysvarAccount(EpochRewards, allocator, epoch_rewards, .{ - .slot = slot, - .slot_store = account_store.forSlot(slot, &ancestors), - .capitalization = &capitalization, - .rent = &genesis_config.rent, - }); - } else { - const leader_schedule_epoch = epoch_schedule.getLeaderScheduleEpoch(slot); - // Since stakes cache is empty, we just need to insert an empty stakes entry - // into the epoch stakes map at the leader schedule epoch stakes map if it is not present - // updateEpochStakes(leader_schedule_epoch); - if (!epoch_stakes_map.contains(leader_schedule_epoch)) { - try epoch_stakes_map.put(allocator, leader_schedule_epoch, .EMPTY); - } - } + // Update epoch + if (parent_slots_epoch < epoch) { + // Bank::process_new_epoch(...) - // Bank::distribute_partitioned_epoch_rewards(...) - // Effectively noop for txn fuzzing purposes since height < distribution_starting_block_height - // See: EpochRewards Debug Log: 0a73c09ab08f77e00b0faa8cf0d70408113b0a92_265678.fix - // try bank_methods.distributePartitionedEpochRewards(); - - // Prepare program cache for upcoming feature set + try bank_methods.applyFeatureActivations( + allocator, + slot, + &feature_set, + account_store, + true, + ); - // Update sysvars + // stakes_cache.activateEpoch(); + // Since the stakes cache is empty, we don't need to actually do anything here except add + // an entry for the parent epoch with zero stakes. + // https://github.com/firedancer-io/agave/blob/10fe1eb29aac9c236fd72d08ae60a3ef61ee8353/runtime/src/stakes.rs#L297 { - const update_sysvar_deps: update_sysvar.UpdateSysvarAccountDeps = .{ - .slot = slot, - .slot_store = account_store.forSlot(slot, &ancestors), - .capitalization = &capitalization, - .rent = &genesis_config.rent, - }; - - try update_sysvar.updateSlotHashes( - allocator, - parent_slot, - parent_hash, - update_sysvar_deps, - ); - try update_sysvar.updateStakeHistory( - allocator, - .{ - .epoch = epoch, - .parent_slots_epoch = parent_slots_epoch, - .stakes_cache = &stakes_cache, - .update_sysvar_deps = update_sysvar_deps, + const stakes, var stakes_guard = stakes_cache.stakes.writeWithLock(); + defer stakes_guard.unlock(); + stakes.epoch = epoch; + std.debug.assert(stakes.stake_history.entries.len == 0); + stakes.stake_history.entries.appendAssumeCapacity(.{ + .epoch = parent_slots_epoch, + .stake = .{ + .effective = 0, + .activating = 0, + .deactivating = 0, }, - ); - _ = try update_sysvar.updateClock(allocator, .{ - .feature_set = &feature_set, - .epoch_schedule = &epoch_schedule, - .epoch_stakes = epoch_stakes_map.getPtr(epoch), - .stakes_cache = &stakes_cache, - .epoch = epoch, - .parent_slots_epoch = parent_slots_epoch, - .genesis_creation_time = genesis_config.creation_time, - .ns_per_slot = @intCast(genesis_config.nsPerSlot()), - .update_sysvar_deps = update_sysvar_deps, }); - try update_sysvar.updateLastRestartSlot( + } + + const leader_schedule_epoch = epoch_schedule.getLeaderScheduleEpoch(slot); + // Since stakes cache is empty, we just need to insert an empty stakes entry + // into the epoch stakes map at the leader schedule epoch stakes map if it is not present + // updateEpochStakes(leader_schedule_epoch); + if (!epoch_stakes_map.contains(leader_schedule_epoch)) + try epoch_stakes_map.put( allocator, - &feature_set, - slot, - &hard_forks, - update_sysvar_deps, + leader_schedule_epoch, + .EMPTY, ); + + // Bank::begin_partitioned_epoch_rewards(...) + // Similar to the above, epoch rewards is set but nothing meaningful is computed + // since there are no staked nodes or rewards to distribute. + // See: EpochRewards Debug Log: 0a73c09ab08f77e00b0faa8cf0d70408113b0a92_265678.fix + const epoch_rewards = EpochRewards{ + .distribution_starting_block_height = 2, + .num_partitions = 1, + .parent_blockhash = blockhash_queue.last_hash.?, + .total_points = 0, + .total_rewards = 0, + .distributed_rewards = 0, + .active = true, + }; + try update_sysvar.updateSysvarAccount(EpochRewards, allocator, epoch_rewards, .{ + .slot = slot, + .slot_store = account_store.forSlot(slot, &ancestors), + .capitalization = &capitalization, + .rent = &genesis_config.rent, + }); + } else { + const leader_schedule_epoch = epoch_schedule.getLeaderScheduleEpoch(slot); + // Since stakes cache is empty, we just need to insert an empty stakes entry + // into the epoch stakes map at the leader schedule epoch stakes map if it is not present + // updateEpochStakes(leader_schedule_epoch); + if (!epoch_stakes_map.contains(leader_schedule_epoch)) { + try epoch_stakes_map.put(allocator, leader_schedule_epoch, .EMPTY); } + } - // Get num accounts modified by this slot if accounts lt hash enabled + // Bank::distribute_partitioned_epoch_rewards(...) + // Effectively noop for txn fuzzing purposes since height < distribution_starting_block_height + // See: EpochRewards Debug Log: 0a73c09ab08f77e00b0faa8cf0d70408113b0a92_265678.fix + // try bank_methods.distributePartitionedEpochRewards(); - // A bunch of stats stuff... - } + // Prepare program cache for upcoming feature set - // bank = bank_forks.write().unwrap().insert(bank).clone_without_scheduler(); - { - // if (root < highest_slot_at_startup) { - // bank.check_program_modification_slot = true; - // } + // Get num accounts modified by this slot if accounts lt hash enabled - // bunch of scheduler and forks stuff... - } + // A bunch of stats stuff... + } - // ProgramCache::prune(slot, epoch) - {} + // bank = bank_forks.write().unwrap().insert(bank).clone_without_scheduler(); + { + // if (root < highest_slot_at_startup) { + // bank.check_program_modification_slot = true; + // } + + // bunch of scheduler and forks stuff... } + // ProgramCache::prune(slot, epoch) + {} + // Checkpoint 2 -- End of Bank Transition to TxnContext Slot // try writeState(allocator, .{ // .slot = slot, @@ -663,50 +584,27 @@ fn executeTxnContext( // }); // Remove address lookup table, stake, and config program accounts by inserting empty accounts (zero-lamports) + // TODO: investigate: agave uses parent_slot here, but that causes more test failures in our code. try account_store.put(slot, program.address_lookup_table.ID, .EMPTY); try account_store.put(slot, program.config.ID, .EMPTY); try account_store.put(slot, program.stake.ID, .EMPTY); - // Load accounts into accounts db - for (accounts_map.keys(), accounts_map.values()) |pubkey, account| { - try account_store.put(slot, pubkey, .{ - .lamports = account.lamports, - .data = account.data, - .owner = account.owner, - .executable = account.executable, - .rent_epoch = account.rent_epoch, - }); - } - - // Update epoch schedule and rent to minimum rent exempt balance - { - const update_sysvar_deps = update_sysvar.UpdateSysvarAccountDeps{ - .slot = slot, - .slot_store = account_store.forSlot(slot, &ancestors), - .capitalization = &capitalization, - .rent = &genesis_config.rent, - }; - - try update_sysvar.updateRent(allocator, genesis_config.rent, update_sysvar_deps); - try update_sysvar.updateEpochSchedule(allocator, epoch_schedule, update_sysvar_deps); - } - - // Get lamports per signature from first entry in recent blockhashes + // Get lamports per signature from first entry in recent blockhashes (read from fixture data directly) const lamports_per_signature = blk: { - const account = try account_store.reader().forSlot(&ancestors).get( - allocator, - RecentBlockhashes.ID, - ) orelse break :blk null; - defer account.deinit(allocator); - - var data = account.data.iterator(); - const reader = data.reader(); - - const len = try sig.bincode.readInt(u64, reader, .{}); + const rbh_account = accounts_map.get(RecentBlockhashes.ID) orelse break :blk null; + const rbh_data = rbh_account.data; + if (rbh_data.len < 8) break :blk null; + const len = std.mem.readInt(u64, rbh_data[0..8], .little); if (len == 0) break :blk null; - const first_entry = try sig.bincode.read(allocator, RecentBlockhashes.Entry, reader, .{}); + var fbs = std.io.fixedBufferStream(rbh_data[8..]); + const first_entry = sig.bincode.read( + allocator, + RecentBlockhashes.Entry, + fbs.reader(), + .{}, + ) catch break :blk null; break :blk if (first_entry.lamports_per_signature != 0) first_entry.lamports_per_signature @@ -718,13 +616,16 @@ fn executeTxnContext( for (blockhashes) |blockhash| { try blockhash_queue.insertHash(allocator, blockhash, lamports_per_signature); } - const update_sysvar_deps = update_sysvar.UpdateSysvarAccountDeps{ - .slot = slot, - .slot_store = account_store.forSlot(slot, &ancestors), - .capitalization = &capitalization, - .rent = &genesis_config.rent, - }; - try update_sysvar.updateRecentBlockhashes(allocator, &blockhash_queue, update_sysvar_deps); + + for (accounts_map.keys(), accounts_map.values()) |pubkey, account| { + try account_store.put(slot, pubkey, .{ + .lamports = account.lamports, + .data = account.data, + .owner = account.owner, + .executable = account.executable, + .rent_epoch = account.rent_epoch, + }); + } // Checkpoint 3 // NOTE: For basic fixtures, we produce equivalent state up until this point, excluding the @@ -786,6 +687,7 @@ fn executeTxnContext( error.InvalidAddressLookupTableOwner => .InvalidAddressLookupTableOwner, error.InvalidAddressLookupTableData => .InvalidAddressLookupTableData, error.InvalidAddressLookupTableIndex => .InvalidAddressLookupTableIndex, + error.InvalidAccountIndex => .InvalidAccountIndex, else => std.debug.panic("Unexpected error: {s}\n", .{@errorName(err)}), }); defer resolved_transaction.deinit(allocator); @@ -911,16 +813,25 @@ fn serializeOutput( const errors = utils.convertTransactionError(txn.err); - var acct_states: std.ArrayList(pb.AcctState) = .{}; - errdefer acct_states.deinit(allocator); - if (result.ok.outputs != null and result.ok.err != null) { + var modified_accounts: std.ArrayListUnmanaged(pb.AcctState) = .empty; + var rollback_accounts: std.ArrayListUnmanaged(pb.AcctState) = .empty; + errdefer modified_accounts.deinit(allocator); + errdefer rollback_accounts.deinit(allocator); + + if (result.ok.err != null) { + for (txn.writes.constSlice()) |account| try rollback_accounts.append( + allocator, + try sharedAccountToState(allocator, account.pubkey, account.account), + ); + // In the event that the transaction is executed and fails, agave // returns *all* the loaded accounts, including all the modifications // from the failed transaction, whereas we only return the rollback // accounts. Our approach makes more sense in the context of the // validator, but for compatibility with solfuzz_agave's outputs, we - // need to return the modified loaded accounts. - for (failed_accounts) |account| { + // need to return the "modified" loaded accounts that aren't actually + // modified since the transaction failed. + if (result.ok.outputs != null) for (failed_accounts) |account| { const was_an_input_and_is_writable = for ( sanitized.accounts.items(.pubkey), sanitized.accounts.items(.is_writable), @@ -928,24 +839,18 @@ fn serializeOutput( if (account.pubkey.equals(&pubkey)) break is_writable; } else false; - if (was_an_input_and_is_writable) try acct_states.append( + if (was_an_input_and_is_writable) try modified_accounts.append( allocator, try sharedAccountToState(allocator, account.pubkey, account.account), ); - } + }; } else for (txn.writes.constSlice()) |account| { - try acct_states.append( + try modified_accounts.append( allocator, try sharedAccountToState(allocator, account.pubkey, account.account), ); } - const resulting_state: pb.ResultingState = .{ - .rent_debits = .{}, - .transaction_rent = txn.rent, - .acct_states = acct_states, - }; - const return_data: []const u8 = if (txn.outputs) |out| if (out.return_data) |ret| try allocator.dupe(u8, ret.data.constSlice()) else &.{} else @@ -955,7 +860,6 @@ fn serializeOutput( .executed = true, .sanitization_error = false, .is_ok = txn.err == null, - .rent = txn.rent, .status = errors.err, .instruction_error = errors.instruction_error, @@ -963,7 +867,8 @@ fn serializeOutput( .custom_error = errors.custom_error, .return_data = return_data, - .resulting_state = resulting_state, + .modified_accounts = modified_accounts, + .rollback_accounts = rollback_accounts, .fee_details = .{ .transaction_fee = txn.fees.transaction_fee, .prioritization_fee = txn.fees.prioritization_fee, @@ -1006,19 +911,13 @@ fn parsePubkey(bytes: []const u8) !Pubkey { return .{ .data = bytes[0..Pubkey.SIZE].* }; } -/// [agave] https://github.com/firedancer-io/solfuzz-agave/blob/agave-v3.1.0-beta.0/src/txn_fuzzer.rs#L319-L323 -fn loadSlot(txn_ctx: *const pb.TxnContext) u64 { - const slot = if (txn_ctx.slot_ctx) |ctx| ctx.slot else return 10; - return if (slot == 0) 10 else slot; -} - /// Load blockhashes from the protobuf transaction context. /// If no blockhashes are provided, a default blockhash of zeroes is returned. fn loadBlockhashes( allocator: std.mem.Allocator, pb_txn_ctx: *const pb.TxnContext, ) ![]Hash { - const pb_blockhashes = pb_txn_ctx.blockhash_queue.items; + const pb_blockhashes = pb_txn_ctx.bank.?.blockhash_queue.items; if (pb_blockhashes.len == 0) return try allocator.dupe(Hash, &.{Hash.ZEROES}); @@ -1026,7 +925,7 @@ fn loadBlockhashes( errdefer allocator.free(blockhashes); for (blockhashes, pb_blockhashes) |*blockhash, pb_blockhash| - blockhash.* = try parseHash(pb_blockhash); + blockhash.* = try parseHash(pb_blockhash.blockhash); return blockhashes; } diff --git a/conformance/src/utils.zig b/conformance/src/utils.zig index 435ca8f9cb..5f13575cf7 100644 --- a/conformance/src/utils.zig +++ b/conformance/src/utils.zig @@ -106,7 +106,7 @@ pub fn createTransactionContext( const log_collector = try sig.runtime.LogCollector.default(allocator); errdefer log_collector.deinit(allocator); - const slot = if (instr_ctx.slot_context) |slot_ctx| slot_ctx.slot else 0; + const slot = 0; tc.* = TransactionContext{ .allocator = allocator, .programs_allocator = allocator, @@ -173,17 +173,16 @@ pub fn deinitTransactionContext( } pub fn loadFeatureSet(ctx: anytype) !FeatureSet { - const epoch_context = switch (@TypeOf(ctx)) { - *pb.TxnContext, *const pb.TxnContext => ctx.epoch_ctx, - pb.InstrContext => ctx.epoch_context, + const pb_features = switch (@TypeOf(ctx)) { + *pb.TxnContext, *const pb.TxnContext => (ctx.bank orelse return .ALL_DISABLED).features, + pb.InstrContext => ctx.features, else => comptime unreachable, } orelse return .ALL_DISABLED; - const pb_features = epoch_context.features orelse return .ALL_DISABLED; var feature_set: FeatureSet = .ALL_DISABLED; for (pb_features.features.items) |id| { // only way for `setSlotId` to fail is if the `id` doesn't exist. - feature_set.setSlotId(id, 0) catch std.debug.panic("unknown id: 0x{x}", .{id}); + feature_set.setSlotId(id, 0) catch std.debug.print("unknown feature id: 0x{x}\n", .{id}); } return feature_set; } diff --git a/conformance/src/vm_interp.zig b/conformance/src/vm_interp.zig index b162897fc6..807d3162bd 100644 --- a/conformance/src/vm_interp.zig +++ b/conformance/src/vm_interp.zig @@ -55,7 +55,7 @@ fn executeVmTest( var instr_context = syscall_context.instr_ctx.?; const vm_context = syscall_context.vm_ctx.?; - const slot = if (instr_context.slot_context) |slot_ctx| slot_ctx.slot else 0; + const slot = 0; for (instr_context.accounts.items) |acc| { if (std.mem.eql(u8, acc.address, instr_context.program_id)) break; diff --git a/docs/docusaurus/docs/code/conformance.md b/docs/docusaurus/docs/code/conformance.md index 2d3cf7d049..407f63c68a 100644 --- a/docs/docusaurus/docs/code/conformance.md +++ b/docs/docusaurus/docs/code/conformance.md @@ -1,24 +1,83 @@ -Dependencies: -- python 3.11 -- cargo/rust -- zig 0.15.2 -- git 2.49 -- cmake +# Dependencies + +Install [nix](https://nixos.org/download/) if you do not have it. + +```bash +nix develop # if you only want to run the tests +nix develop .#agave # if you also want to run the test vectors against agave +``` + +# Build + +To run the conformance tests, you'll need a build of solfuzz_sig. Either Debug or ReleaseSafe builds are fine. ```bash -# set up the test environment -scripts/setup-env.sh -source env/pyvenv/bin/activate +zig build solfuzz_sig +``` -# compile the sig binary to test -zig build -Doptimize=ReleaseSafe solfuzz_sig -# run the conformance tests for the first time -./run.py --create +# Run -# re-run conformance tests without re-creating the fixtures from scratch -./run.py +**Run the test vectors** -# for more options -./run.py --help +```bash +# run all conformance test vectors +run + +# run only the transaction harness tests +run txn +``` + +For customization, try `--help`. + +**Run the conformance CI job** + +```bash +scripts/ci-run.sh +``` + +**Run solana-conformance directly** + +run.py is a helper script to make this process easier. But in some cases when debugging, it's useful to interact directly with solana-conformance. Here's an example of how to run the zk_sdk tests with solana-conformance: + +```bash +solana-conformance \ + exec-fixtures \ + -i env/test-vectors/instr/fixtures/zk_sdk \ + -t zig-out/lib/libsolfuzz_sig.so \ + -o env/test-outputs/ ``` + +**Debug Agave** + +If you want to debug agave or manually generate fixtures from agave, you'll need to set up the agave environment with `nix develop .#agave`. + +You can edit any of the agave code in the env/ folder to debug agave. To run the conformance tests against this code, you'll need to compile solfuzz_agave: + +```bash +cd env/solfuzz-agave +cargo build --lib --release +``` + +Then you can use some commands like these: + +```bash +# run the tests against agave +run --exec-lib env/solfuzz-agave/target/release/libsolfuzz_agave.so + +# create the fixtures based on agave, and run the conformance tests against sig +run --create + +# re-run conformance tests against sig using your created fixtures +run --use-created +``` + +# Interpreting results + +The expected and actual results for all tests are in env/test-outputs. You can use `parseout` to interpret the results. + +```bash +parseout txn +``` + +See [parseout/README.md] for more info. diff --git a/docs/docusaurus/docs/code/parseout.md b/docs/docusaurus/docs/code/parseout.md new file mode 100644 index 0000000000..63d5b32f67 --- /dev/null +++ b/docs/docusaurus/docs/code/parseout.md @@ -0,0 +1,215 @@ +# parseout + +A parser and differ for the conformance test outputs. Useful for categorizing or +interpreting a large number of failed tests. Compares expected and actual +results, identifies mismatches, and categorizes them based on the types of +mismatches. + +## Quick start + +```bash +parseout diff expected.txt actual.txt +``` + +or for a smarter version that handles transaction harness failures better than `diff`: + +```bash +parseout txn +``` + +This produces three output files prints a summary +to stdout: + +- **\*.csv:** one row per mismatch (`header, categories`), sorted so rows + with the same category combination are adjacent. +- **\*-category.json:** mismatches grouped by individual category. A + mismatch appears under every category it belongs to. +- **\*-combo.json:** mismatches grouped by their exact combination of + categories. Each mismatch appears under exactly one key. + +### Example stdout summary + +``` +1818 mismatches written to txn.csv, txn-category.json, txn-combo.json + +Categories: + result_type: 1194 + status: 233 + instruction_error: 312 + ... + +Combinations: + result_type: 1194 + modified_accounts+rollback_accounts: 185 + instruction_error+status: 155 + ... +``` + +**Categories** lists every individual category with the total number of +mismatches it appears in (a mismatch can be counted under multiple +categories). + +**Combinations** lists every unique set of categories as a single entry, +with the count of mismatches that have exactly that combination. + +## Subcommands + +### `diff` -- generic diff + +```bash +parseout diff +``` + +Uses the generic parser only. Recursively compares every field in each record +and generates diff categories dynamically from the field path where differences +occur. Category naming: + +- `field.subfield` -- values differ at that path +- `field.0.subfield` -- values differ at list index 0, subfield +- `field-missing` -- field exists in expected but not actual +- `field-unexpected` -- field exists in actual but not expected +- `field` (for lists) -- list lengths differ + +Outputs `diff.csv`, `diff-category.json`, `diff-combo.json` to the current +directory. + +### `txn` -- transaction diff + +```bash +parseout txn [expected] [actual] +``` + +A domain-specific differ for transaction conformance outputs. + +The generic differ makes a best effort at detecting differences in fields, but it +doesn't encode any domain-specific knowledge. For the transaction harness, the +mismatch categories from `diff` can be confusing. That's where this `txn` command comes in handy. + +This parses both files into typed records with known fields (status, fee_details, +modified_accounts, etc.) and compares them using a fixed set of categories +tailored to transaction semantics. + +`` and `` are optional. If omitted, it will locate files in +`env/test-outputs/txn/fixtures/` and place the outputs there as well. + +Outputs `txn.csv`, `txn-category.json`, `txn-combo.json`. + +### `parse` -- generic parse + +```bash +parseout parse +``` + +Converts a single test output file from the protobuf text format into JSON +using the generic parser. Prints to stdout. Useful if you'd like to interpret +the results in a separate program that is able to parse json. + +## Architecture + +The parser is split into two layers. The layers are composable and you can import +them as a library into python scripts for custom interpretations of test results. + +There are also two differs that each operate on the two parsing layers. + +### Parse Layer 1: generic parser (`parseout.parser`) + +Parses the protobuf-text-like format into `OrderedDict[str, dict]`. Handles +record headers, key-value pairs, nested `{ }` blocks, and record separators +(lines of 20 hyphens). No regex -- uses only string operations. + +### Parse Layer 2: transaction parser (`parseout.transaction.parser`) + +Converts the generic dicts into a typed dataclass hierarchy: + +- **Record:** wraps a `test_id` and a result variant. +- **SanitizationError:** the transaction failed before execution. +- **ExecutedSuccess:** the transaction executed and succeeded. +- **ExecutedError:** the transaction executed but failed. +- **FeeDetails:** fee breakdown (transaction fee, prioritization fee). +- **AccountEntry:** a modified or rollback account. + +The result variant is a tagged union discriminated by the `sanitization_error`, +`executed`, and `is_ok` fields in the raw data. + +### Generic differ (`parseout.differ`) - Layer 1 diff + +Compares two parsed `OrderedDict[str, Block]` collections by shared headers. +Recursively walks every field and generates categories from the path where +values diverge. Works on any file the generic parser can read. + +### Transaction differ (`parseout.transaction.differ`) - Layer 2 diff + +Compares two parsed `OrderedDict[str, Record]` collections by shared test IDs. +Each mismatch is tagged with one or more `Category` values from a fixed enum: + +| Category | Description | +| --------------------------- | ----------------------------------------------- | +| `result_type` | Different variant types (e.g. success vs error) | +| `status` | Different status codes | +| `instruction_error` | Different instruction error codes | +| `instruction_error_index` | Different instruction error indices | +| `custom_error` | Different custom error codes | +| `fee_details` | Different fee breakdowns | +| `executed_units` | Different compute unit counts | +| `loaded_accounts_data_size` | Different loaded account data sizes | +| `return_data` | Different return data | +| `modified_accounts` | Different modified account lists | +| `rollback_accounts` | Different rollback account lists | + +When two records have different variant types, only `result_type` is reported. + +## Input format + +Each file contains records separated by lines of exactly 20 hyphens +(`--------------------`). Each record starts with a header line (the test ID) +followed by a colon, then key-value pairs and nested blocks: + +``` +some_test_id: +executed: true +is_ok: true +executed_units: 27829 +fee_details { + transaction_fee: 15000 + prioritization_fee: 500 +} +modified_accounts { + address: "8fi2Typkf4m1z9miGfZQGRXDimBTVQqWHciMA9aZGXpN" + lamports: 10733753813112760225 + owner: "11111111111111111111111111111111" +} +-------------------- +another_test_id: +sanitization_error: true +status: 9 +``` + +## Tests + +```bash +pytest +``` + +151 tests run inline in the source files (parser, generic differ, transaction +parser, transaction differ). + +## Python API + +```python +# Generic diff -- works on any file, dynamic categories +from parseout import diff_files + +mismatches = diff_files("expected.txt", "actual.txt") +for m in mismatches: + print(m.header, m.categories) + +# Transaction diff -- typed records, fixed categories +from parseout.transaction import parse_file, diff_files, Category + +expected = parse_file("expected.txt") +actual = parse_file("actual.txt") + +mismatches = diff_files("expected.txt", "actual.txt") +for m in mismatches: + print(m.test_id, m.categories) +``` diff --git a/docs/generate.py b/docs/generate.py index 653ef9ea4b..2b0aaa8815 100644 --- a/docs/generate.py +++ b/docs/generate.py @@ -1,3 +1,4 @@ +import subprocess import os # get all the markdown files in the source repo @@ -26,15 +27,24 @@ def get_markdown_files( # add all markdown files to list for file in files: if file.endswith(".md"): - dir_name = os.path.basename(root) - if dir_name == "" or dir_name == "." or dir_name == "..": + if file == "README.md" and root == src_path: # this is the root readme.md -- we dont include # it in the docs for now continue - - docs_path = os.path.join(code_dir_path, dir_name + ".md") - if dir_name == "metrics" : + + if subprocess.run( + ["git", "check-ignore", os.path.join(root, file)], + stdout=subprocess.DEVNULL, + ).returncode == 0: + continue + + dir_name = os.path.basename(root) + if dir_name == "metrics": docs_path = os.path.join(docs_dir_path, "usage/metrics.md") + elif file.lower() == "readme.md": + docs_path = os.path.join(code_dir_path, dir_name + ".md") + else: + docs_path = os.path.join(code_dir_path, dir_name + "_" + file) doc_files.append([ # src/ path @@ -45,22 +55,24 @@ def get_markdown_files( return doc_files + # generate the docs from the source files if __name__ == "__main__": # point to the source sig/ repo # (should be run from the docs/ directory) - src_path = "../" - code_docs_path = "docusaurus/docs" + docs_dir = os.path.dirname(os.path.abspath(__file__)) + repo_root = os.path.abspath(os.path.join(docs_dir, "..")) + code_docs_path = os.path.join(docs_dir, "docusaurus", "docs") # dirs which not to search exclude_dirs = [ - src_path + "docs", # dont search yourself - src_path + "data", # this should only include data - src_path + "conformance/env", # other repositories - src_path + "workspace", # CI workspace (e.g. zig toolchain), not source + os.path.join(repo_root, "docs"), # dont search yourself + os.path.join(repo_root, "data"), # this should only include data + os.path.join(repo_root, "conformance/env"), # other repositories + os.path.join(repo_root, "workspace"), # CI workspace (e.g. zig toolchain), not source ] - for src_path, docs_path in get_markdown_files(src_path, exclude_dirs, code_docs_path): + for src_path, docs_path in get_markdown_files(repo_root, exclude_dirs, code_docs_path): print(src_path) # copy the file to the docs/code directory with open(src_path, "r") as f: diff --git a/src/core/features.zon b/src/core/features.zon index a1293008c4..6dd4f40a3a 100644 --- a/src/core/features.zon +++ b/src/core/features.zon @@ -207,7 +207,7 @@ .{ .name = "reenable_sbpf_v0_execution", .pubkey = "TestFeature21111111111111111111111111111111" }, .{ .name = "enable_sbpf_v1_deployment_and_execution", .pubkey = "JE86WkYvTrzW8HgNmrHY7dFYpCmSptUpKupbo2AdQ9cG" }, .{ .name = "enable_sbpf_v2_deployment_and_execution", .pubkey = "F6UVKh1ujTEFK3en2SyAL3cdVnqko1FVEXWhmdLRu6WP" }, - .{ .name = "enable_sbpf_v3_deployment_and_execution", .pubkey = "BUwGLeF3Lxyfv1J1wY8biFHBB2hrk2QhbNftQf3VV3cC" }, + .{ .name = "enable_sbpf_v3_deployment_and_execution", .pubkey = "5cC3foj77CWun58pC51ebHFUWavHWKarWyR5UUik7dnC" }, .{ .name = "migrate_feature_gate_program_to_core_bpf", .pubkey = "4eohviozzEeivk1y9UbrnekbAFMDQyJz5JjA9Y6gyvky", .activated_on_all_clusters = true }, .{ .name = "migrate_config_program_to_core_bpf", .pubkey = "2Fr57nzzkLYXW695UdDxDeR5fhnZWSttZeZYemrnpGFV", .activated_on_all_clusters = true }, .{ .name = "migrate_address_lookup_table_program_to_core_bpf", .pubkey = "C97eKZygrkU4JxJsZdjgbUY7iQR7rKTr4NyDWo2E5pRm", .activated_on_all_clusters = true }, @@ -235,7 +235,7 @@ .{ .name = "vote_only_retransmitter_signed_fec_sets", .pubkey = "RfEcA95xnhuwooVAhUUksEJLZBF7xKCLuqrJoqk4Zph" }, .{ .name = "mask_out_rent_epoch_in_vm_serialization", .pubkey = "RENtePQcDLrAbxAsP3k8dwVcnNYQ466hi2uKvALjnXx" }, .{ .name = "disable_zk_elgamal_proof_program", .pubkey = "zkdoVwnSFnSLtGJG7irJPEYUpmb4i7sGMGcnN6T9rnC" }, - .{ .name = "reenable_zk_elgamal_proof_program", .pubkey = "zkesAyFB19sTkX8i9ReoKaMNDA4YNTPYJpZKPDt7FMW" }, + .{ .name = "reenable_zk_elgamal_proof_program", .pubkey = "zkexuyPRdyTVbZqEAREueqL2xvvoBhRgth9xGSc1tMN" }, .{ .name = "formalize_loaded_transaction_data_size", .pubkey = "DeS7sR48ZcFTUmt5FFEVDr1v1bh73aAbZiZq3SYr8Eh8" }, .{ .name = "enable_extend_program_checked", .pubkey = "2oMRZEDWT2tqtYMofhmmfQ8SsjqUFzT6sYXppQDavxwz" }, .{ .name = "require_static_nonce_account", .pubkey = "7VVhpg5oAjAmnmz1zCcSHb2Z9ecZB2FQqpnEwReka9Zm" }, @@ -246,12 +246,35 @@ .{ .name = "enforce_fixed_fec_set", .pubkey = "fixfecLZYMfkGzwq6NJA11Yw6KYztzXiK9QcL3K78in" }, .{ .name = "deprecate_rent_exemption_threshold", .pubkey = "rent6iVy6PDoViPBeJ6k5EJQrkj62h7DPyLbWGHwjrC" }, .{ .name = "static_instruction_limit", .pubkey = "64ixypL1HPu8WtJhNSMb9mSgfFaJvsANuRkTbHyuLfnx" }, - .{ .name = "account_data_direct_mapping", .pubkey = "DFN8MyKpQqFW31qczcahgnnxcAHQc6P94wtTEX5EP1RA" }, + .{ .name = "account_data_direct_mapping", .pubkey = "CR3dVN2Yoo95Y96kLSTaziWDAQT2MNEpiWh5cqVq2pNE" }, .{ .name = "provide_instruction_data_offset_in_vm_r2", .pubkey = "5xXZc66h4UdB6Yq7FzdBxBiRAFMMScMLwHxk2QZDaNZL" }, .{ .name = "poseidon_enforce_padding", .pubkey = "poUdAqRXXsNmfqAZ6UqpjbeYgwBygbfQLEvWSqVhSnb" }, .{ .name = "fix_alt_bn128_pairing_length_check", .pubkey = "bnYzodLwmybj7e1HAe98yZrdJTd7we69eMMLgCXqKZm" }, .{ .name = "increase_cpi_account_info_limit", .pubkey = "H6iVbVaDZgDphcPbcZwc5LoznMPWQfnJ1AM7L1xzqvt5" }, .{ .name = "vote_state_v4", .pubkey = "Gx4XFcrVMt4HUvPzTpTSVkdDVgcDSjKhDN1RqRS6KDuZ" }, + .{ .name = "switch_to_chacha8_turbine", .pubkey = "CHaChatUnR3s6cPyPMMGNJa3VdQQ8PNH2JqdD4LpCKnB" }, .{ .name = "enable_bls12_381_syscall", .pubkey = "b1sgUiJ3qu7hYm3tNDyyqZNQd6gLGJmJppnLNa93PCQ" }, .{ .name = "remove_simple_vote_from_cost_model", .pubkey = "2GCrNXbzmt4xrwdcKS2RdsLzsgu4V5zHAemW57pcHT6a" }, + .{ .name = "syscall_parameter_address_restrictions", .pubkey = "EDGMC5kxFxGk4ixsNkGt8bW7QL5hDMXnbwaZvYMwNfzF" }, + .{ .name = "virtual_address_space_adjustments", .pubkey = "7VgiehxNxu53KdxgLspGQY8myE6f7UokaWa4jsGcaSz" }, + .{ .name = "delay_commission_updates", .pubkey = "76dHtohc2s5dR3ahJyBxs7eJJVipFkaPdih9CLgTTb4B" }, + .{ .name = "create_account_allow_prefund", .pubkey = "6sPDzwyARRExKH52LECxcGoqziH8G7SZofwuxi8Ja331" }, + .{ .name = "alt_bn128_little_endian", .pubkey = "bn2oPgpkzQPT3tohMaAsMVGjhDmmDa4jCaVPqCFmtxM" }, + .{ .name = "bls_pubkey_management_in_vote_account", .pubkey = "2uxQgtKa2ECHGs67Zdj7dgmzn2w9HiqhdcedwCWfYzzq" }, + .{ .name = "enable_alt_bn128_g2_syscalls", .pubkey = "bn1hKNURMGQaQoEVxahcEAcqiX3NwRs6hgKKNSLeKxH" }, + .{ .name = "commission_rate_in_basis_points", .pubkey = "CommissionRate1nBasisPoints1111111111111111" }, + .{ .name = "custom_commission_collector", .pubkey = "CustomCommissionCo11ector111111111111111111" }, + .{ .name = "set_lamports_per_byte_to_6333", .pubkey = "4a6f7o7iTcA8hRDCrPLkSatnt5Ykxiu36wo5p1Tt12wC" }, + .{ .name = "set_lamports_per_byte_to_5080", .pubkey = "61BtM7BkDEE8Yq5fskEVAQT9mYA8qCejJWoLe5apqg81" }, + .{ .name = "set_lamports_per_byte_to_2575", .pubkey = "Ftxb3ZKq7aNqgxDBbP7EonvR2RszZk9ctjdsTX38kQaz" }, + .{ .name = "set_lamports_per_byte_to_1322", .pubkey = "GsUBNYNDPdMLHPD37TToHzrzcNcjpC9w5n1EcJk5iTaM" }, + .{ .name = "set_lamports_per_byte_to_696", .pubkey = "mZdnRh9T2EbDNvqKjkCR3bvo5c816tJaojtE9Xs7iuY" }, + .{ .name = "limit_instruction_accounts", .pubkey = "6aHuNsUmwSzCEMjrBzBCYaxHAyAcQBjVES92JigHBDuC" }, + .{ .name = "block_revenue_sharing", .pubkey = "B1ockRevenueSharing111111111111111111111111" }, + .{ .name = "vote_account_initialize_v2", .pubkey = "VoteAccount1nitia1izeV211111111111111111111" }, + .{ .name = "validate_chained_block_id", .pubkey = "vbiddkDHTSHSvL8B21AetWvTBLxxUZ1FmU6DFjztyRn" }, + .{ .name = "validator_admission_ticket", .pubkey = "VATtb1DepUwdPh5bFVasdtkbeDNsftZSRzr2aKpKWJA" }, + // TODO(agave 4.1): This will be rekeyed before activation. + // https://github.com/anza-xyz/agave/blob/v4.0.0-beta.0/feature-set/src/lib.rs#L1129-L1149 + .{ .name = "alpenglow", .pubkey = "mustRekeyVm2QHYB3JPefBiU4BY3Z6JkW2k3Scw5GWP" }, } diff --git a/src/replay/resolve_lookup.zig b/src/replay/resolve_lookup.zig index dfef1c0180..dc0dffe21b 100644 --- a/src/replay/resolve_lookup.zig +++ b/src/replay/resolve_lookup.zig @@ -208,12 +208,11 @@ pub fn resolveTransaction( // construct instructions const instructions = try allocator.alloc(InstructionInfo, message.instructions.len); - errdefer { - for (instructions) |instr| instr.deinit(allocator); - allocator.free(instructions); - } + errdefer allocator.free(instructions); + + for (message.instructions, instructions, 0..) |input_ix, *output_ix, output_i| { + errdefer for (instructions[0..output_i]) |prev_output_ix| prev_output_ix.deinit(allocator); - for (message.instructions, instructions) |input_ix, *output_ix| { var account_metas = InstructionInfo.AccountMetas{}; errdefer account_metas.deinit(allocator); diff --git a/src/runtime/executor.zig b/src/runtime/executor.zig index e06eeefc45..3ad9aa691c 100644 --- a/src/runtime/executor.zig +++ b/src/runtime/executor.zig @@ -278,13 +278,13 @@ pub fn prepareCpiInstructionInfo( const index_in_callee_ptr = &dedupe_map[index_in_transaction]; if (index_in_callee_ptr.* < deduped_account_metas.items.len) { + try deduped_account_metas.ensureUnusedCapacity(tc.allocator, 1); const prev = &deduped_account_metas.items[index_in_callee_ptr.*]; prev.is_signer = prev.is_signer or account.is_signer; prev.is_writable = prev.is_writable or account.is_writable; std.debug.assert(prev.index_in_transaction < InstructionInfo.MAX_ACCOUNT_METAS); - const new = prev.*; // this avoids a bug caused by Parameter Reference Optimisation (PRO) - try deduped_account_metas.append(tc.allocator, new); + deduped_account_metas.addOneAssumeCapacity().* = prev.*; // this avoids a bug caused by Parameter Reference Optimisation (PRO) } else { index_in_callee_ptr.* = @intCast(deduped_account_metas.items.len); try deduped_account_metas.append(tc.allocator, .{ @@ -297,9 +297,8 @@ pub fn prepareCpiInstructionInfo( } for (deduped_account_metas.items, 0..) |*account_meta, index_in_instruction| { - std.debug.assert(account_meta.index_in_transaction < InstructionInfo.MAX_ACCOUNT_METAS); - const index_in_callee = dedupe_map[account_meta.index_in_transaction]; + std.debug.assert(account_meta.index_in_transaction < InstructionInfo.MAX_ACCOUNT_METAS); if (index_in_callee != index_in_instruction) { if (index_in_callee >= deduped_account_metas.items.len) return error.MissingAccount; diff --git a/src/runtime/program/address_lookup_table/state.zig b/src/runtime/program/address_lookup_table/state.zig index 2df2280f68..1191e13e4e 100644 --- a/src/runtime/program/address_lookup_table/state.zig +++ b/src/runtime/program/address_lookup_table/state.zig @@ -72,7 +72,7 @@ pub const LookupTableMeta = struct { return LookupTableStatus.Activated; } if (self.deactivation_slot == current_slot) { - return LookupTableStatus{ .Deactivating = .{ .remaining_blocks = MAX_ENTRIES } }; + return LookupTableStatus{ .Deactivating = .{ .remaining_blocks = MAX_ENTRIES +| 1 } }; } if (slot_hashes.getIndex(self.deactivation_slot)) |slot_hash_position| { return LookupTableStatus{ diff --git a/src/runtime/program/vote/execute.zig b/src/runtime/program/vote/execute.zig index 30ebb8381d..b6609764d5 100644 --- a/src/runtime/program/vote/execute.zig +++ b/src/runtime/program/vote/execute.zig @@ -999,6 +999,12 @@ fn getVoteStateChecked( switch (target_version) { .v3 => { + // V0_23_5 is no longer supported. agave's + // VoteStateV3::deserialize_into_ptr returns InvalidAccountData for variant 0 + // when compiled for target_os = "solana". + if (versioned_state == .v0_23_5) { + return InstructionError.InvalidAccountData; + } // Existing flow before v4 feature gate activation: // Deserialize as VoteStateVersions (converting during deserialization). // Some callsites deserialize without checking initialization status. diff --git a/src/runtime/transaction_execution.zig b/src/runtime/transaction_execution.zig index dee55e71de..c5c32d7866 100644 --- a/src/runtime/transaction_execution.zig +++ b/src/runtime/transaction_execution.zig @@ -199,6 +199,17 @@ pub fn loadAndExecuteTransaction( return .{ .err = .AccountLoadedTwice }; } + // Compute budget sanitization must come before checkAge to match agave's + // check_age_and_compute_budget_limits ordering (agave v4.0). + const compute_budget_limits = switch (compute_budget_program.sanitize( + transaction.compute_budget_instruction_details, + env.feature_set, + env.slot, + )) { + .ok => |x| x, + .err => |e| return .{ .err = e }, + }; + const maybe_nonce_info = switch (try sig.runtime.check_transactions.checkAge( tmp_allocator, transaction, @@ -214,15 +225,6 @@ pub fn loadAndExecuteTransaction( var nonce_account_is_owned = true; defer if (nonce_account_is_owned) if (maybe_nonce_info) |n| tmp_allocator.free(n.account.data); - const compute_budget_limits = switch (compute_budget_program.sanitize( - transaction.compute_budget_instruction_details, - env.feature_set, - env.slot, - )) { - .ok => |x| x, - .err => |e| return .{ .err = e }, - }; - if (sig.runtime.check_transactions.checkStatusCache( &transaction.msg_hash, &transaction.recent_blockhash,