|
| 1 | +#!/usr/bin/env python |
| 2 | +# |
| 3 | +# usage: cmp-state-dumps [-h] [-d] |
| 4 | +# Compare all files in the state_dumps directory and outputs a summary |
| 5 | +# options: |
| 6 | +# -h, --help show this help message and exit |
| 7 | +# -d, --delete removes matching files |
| 8 | +# |
| 9 | +# Uses a pool of worker threads that compare each state dump. |
| 10 | +# possible improvements: use a pool of workers for file removing. |
| 11 | + |
| 12 | +import argparse |
| 13 | +import glob |
| 14 | +import re |
| 15 | +import multiprocessing as mp |
| 16 | +import os |
| 17 | +from collections import defaultdict |
| 18 | + |
| 19 | +POOL_SIZE = 16 |
| 20 | + |
| 21 | +STATE_DUMPS_PATH = "state_dumps" |
| 22 | +VM_DIRECTORY = "vm" |
| 23 | +NATIVE_DIRECTORY = "native" |
| 24 | + |
| 25 | +LOG_PATH = "state_dumps/matching.log" |
| 26 | + |
| 27 | + |
| 28 | +def compare(vm_dump_path: str): |
| 29 | + native_dump_path = re.sub(VM_DIRECTORY, NATIVE_DIRECTORY, vm_dump_path, count=1) |
| 30 | + |
| 31 | + if not (m := re.findall(r"/(0x.*).json", vm_dump_path)): |
| 32 | + raise Exception("bad path") |
| 33 | + tx = m[0] |
| 34 | + |
| 35 | + if not (m := re.findall(r"block(\d+)", vm_dump_path)): |
| 36 | + raise Exception("bad path") |
| 37 | + block = m[0] |
| 38 | + |
| 39 | + try: |
| 40 | + with open(native_dump_path) as f: |
| 41 | + native_dump = f.read() |
| 42 | + with open(vm_dump_path) as f: |
| 43 | + vm_dump = f.read() |
| 44 | + except: # noqa: E722 |
| 45 | + return ("MISS", block, tx) |
| 46 | + |
| 47 | + native_dump = re.sub(r".*reverted.*", "", native_dump, count=1) |
| 48 | + vm_dump = re.sub(r".*reverted.*", "", vm_dump, count=1) |
| 49 | + |
| 50 | + if native_dump == vm_dump: |
| 51 | + return ("MATCH", block, tx, vm_dump_path, native_dump_path) |
| 52 | + else: |
| 53 | + return ("DIFF", block, tx) |
| 54 | + |
| 55 | + |
| 56 | +if __name__ == "__main__": |
| 57 | + parser = argparse.ArgumentParser( |
| 58 | + prog="cmp-state-dumps", |
| 59 | + description="Compare all files in the state_dumps directory and outputs a summary", |
| 60 | + ) |
| 61 | + parser.add_argument( |
| 62 | + "-d", "--delete", action="store_true", help="removes matching files" |
| 63 | + ) |
| 64 | + config = parser.parse_args() |
| 65 | + |
| 66 | + files = glob.glob(f"{STATE_DUMPS_PATH}/{VM_DIRECTORY}/*/*.json") |
| 67 | + files.sort(key=os.path.getmtime) |
| 68 | + |
| 69 | + print(f"Starting comparison with {POOL_SIZE} workers") |
| 70 | + |
| 71 | + stats = defaultdict(int) |
| 72 | + with mp.Pool(POOL_SIZE) as pool, open(LOG_PATH, mode="a") as log: |
| 73 | + for status, *info in pool.imap(compare, files): |
| 74 | + stats[status] += 1 |
| 75 | + |
| 76 | + if status != "MATCH": |
| 77 | + (block, tx) = info |
| 78 | + print(status, block, tx) |
| 79 | + |
| 80 | + elif status == "MATCH" and config.delete: |
| 81 | + (block, tx, vm_dump_path, native_dump_path) = info |
| 82 | + |
| 83 | + log.write(f"{block} {tx}\n") |
| 84 | + log.flush() |
| 85 | + os.remove(native_dump_path) |
| 86 | + os.remove(vm_dump_path) |
| 87 | + |
| 88 | + print("Finished comparison") |
| 89 | + |
| 90 | + print() |
| 91 | + for key, count in stats.items(): |
| 92 | + print(key, count) |
| 93 | + |
| 94 | + if stats["DIFF"] != 0 or stats["MISS"] != 0: |
| 95 | + exit(1) |
| 96 | + else: |
| 97 | + exit(0) |
0 commit comments