diff --git a/chia/consensus/blockchain.py b/chia/consensus/blockchain.py index 8c0150127944..24704ad54158 100644 --- a/chia/consensus/blockchain.py +++ b/chia/consensus/blockchain.py @@ -735,6 +735,8 @@ def clean_block_record(self, height: int) -> None: Args: height: Minimum height that we need to keep in the cache """ + if self._peak_height is not None and height > self._peak_height - self.constants.BLOCKS_CACHE_SIZE: + height = self._peak_height - self.constants.BLOCKS_CACHE_SIZE if height < 0: return None blocks_to_remove = self.__heights_in_cache.get(uint32(height), None) diff --git a/chia/full_node/full_node.py b/chia/full_node/full_node.py index 100a2f0b0e73..791e2564cdee 100644 --- a/chia/full_node/full_node.py +++ b/chia/full_node/full_node.py @@ -538,7 +538,8 @@ async def short_sync_batch(self, peer: WSChiaConnection, start_height: uint32, t ) if first is None or not isinstance(first, full_node_protocol.RespondBlock): self.sync_store.batch_syncing.remove(peer.peer_node_id) - raise ValueError(f"Error short batch syncing, could not fetch block at height {start_height}") + self.log.error(f"Error short batch syncing, could not fetch block at height {start_height}") + return False if not self.blockchain.contains_block(first.block.prev_header_hash): self.log.info("Batch syncing stopped, this is a deep chain") self.sync_store.batch_syncing.remove(peer.peer_node_id) @@ -716,6 +717,10 @@ async def new_peak(self, request: full_node_protocol.NewPeak, peer: WSChiaConnec return None if request.height < curr_peak_height + self.config["sync_blocks_behind_threshold"]: + # TODO: We get here if we encountered a heavier peak with a + # lower height than ours. We don't seem to handle this case + # right now. This ends up requesting the block at *our* peak + # height. # This case of being behind but not by so much if await self.short_sync_batch(peer, uint32(max(curr_peak_height - 6, 0)), request.height): return None @@ -1110,6 +1115,11 @@ async def validate_block_batches( self.subscriptions.has_ph_subscription, ) await self.hint_store.add_hints(hints_to_add) + # Note that end_height is not necessarily the peak at this + # point. In case of a re-org, it may even be significantly + # higher than _peak_height, and still not be the peak. + # clean_block_record() will not necessarily honor this cut-off + # height, in that case. self.blockchain.clean_block_record(end_height - self.constants.BLOCKS_CACHE_SIZE) batch_queue_input: asyncio.Queue[Optional[Tuple[WSChiaConnection, List[FullBlock]]]] = asyncio.Queue( diff --git a/chia/simulator/block_tools.py b/chia/simulator/block_tools.py index 380f8310b07d..5555b7baebc4 100644 --- a/chia/simulator/block_tools.py +++ b/chia/simulator/block_tools.py @@ -153,7 +153,10 @@ NUM_SPS_SUB_SLOT=uint32(16), # Must be a power of 2 MAX_SUB_SLOT_BLOCKS=uint32(50), EPOCH_BLOCKS=uint32(340), - BLOCKS_CACHE_SIZE=uint32(340 + 3 * 50), # Coordinate with the above values + # the block cache must contain at least 3 epochs in order for + # create_prev_sub_epoch_segments() to have access to all the blocks it needs + # from the cache + BLOCKS_CACHE_SIZE=uint32(340 * 3), # Coordinate with the above values SUB_SLOT_TIME_TARGET=600, # The target number of seconds per slot, mainnet 600 SUB_SLOT_ITERS_STARTING=uint64(2**10), # Must be a multiple of 64 NUMBER_ZERO_BITS_PLOT_FILTER=1, # H(plot signature of the challenge) must start with these many zeroes diff --git a/tests/core/full_node/test_full_node.py b/tests/core/full_node/test_full_node.py index 8112da6b14ba..55cb3dad25fb 100644 --- a/tests/core/full_node/test_full_node.py +++ b/tests/core/full_node/test_full_node.py @@ -6,6 +6,7 @@ import logging import random import time +import traceback from typing import Coroutine, Dict, List, Optional, Tuple import pytest @@ -2150,3 +2151,91 @@ async def test_wallet_sync_task_failure( assert "update_wallets - fork_height: 10, peak_height: 0" in caplog.text assert "Wallet sync task failure" not in caplog.text assert not full_node.wallet_sync_task.done() + + +@pytest.mark.anyio +@pytest.mark.limit_consensus_modes(reason="this test is too slow (for now)") +async def test_long_reorg_nodes( + three_nodes, + default_10000_blocks: List[FullBlock], + test_long_reorg_blocks: List[FullBlock], + self_hostname: str, + seeded_random: random.Random, +): + full_node_1, full_node_2, full_node_3 = three_nodes + + blocks = default_10000_blocks[:1600] + + reorg_blocks = test_long_reorg_blocks[:1200] + + # full node 1 has the original chain + for block_batch in to_batches(blocks, 64): + b = block_batch.entries[0] + if (b.height % 128) == 0: + print(f"main chain: {b.height:4} weight: {b.weight}") + await full_node_1.full_node.add_block_batch(block_batch.entries, PeerInfo("0.0.0.0", 8884), None) + + # full node 2 has the reorg-chain + for block_batch in to_batches(reorg_blocks[:-1], 64): + b = block_batch.entries[0] + if (b.height % 128) == 0: + print(f"reorg chain: {b.height:4} weight: {b.weight}") + await full_node_2.full_node.add_block_batch(block_batch.entries, PeerInfo("0.0.0.0", 8884), None) + + await connect_and_get_peer(full_node_1.full_node.server, full_node_2.full_node.server, self_hostname) + + # TODO: There appears to be an issue where the node with the lighter chain + # fails to initiate the reorg until there's a new block farmed onto the + # heavier chain. + await full_node_2.full_node.add_block(reorg_blocks[-1]) + + def check_nodes_in_sync(): + try: + p1 = full_node_2.full_node.blockchain.get_peak() + p2 = full_node_1.full_node.blockchain.get_peak() + return p1 == p2 + except Exception as e: + # TODO: understand why we get an exception here sometimes. Fix it or + # add comment explaining why we need to catch here + traceback.print_exc() + print(f"e: {e}") + return False + + await time_out_assert(120, check_nodes_in_sync) + peak = full_node_2.full_node.blockchain.get_peak() + print(f"peak: {str(peak.header_hash)[:6]}") + + p1 = full_node_1.full_node.blockchain.get_peak() + p2 = full_node_2.full_node.blockchain.get_peak() + + assert p1.header_hash == reorg_blocks[-1].header_hash + assert p2.header_hash == reorg_blocks[-1].header_hash + + blocks = default_10000_blocks[:4000] + + # full node 3 has the original chain, but even longer + for block_batch in to_batches(blocks, 64): + b = block_batch.entries[0] + if (b.height % 128) == 0: + print(f"main chain: {b.height:4} weight: {b.weight}") + await full_node_3.full_node.add_block_batch(block_batch.entries, PeerInfo("0.0.0.0", 8884), None) + + print("connecting node 3") + await connect_and_get_peer(full_node_3.full_node.server, full_node_1.full_node.server, self_hostname) + # await connect_and_get_peer(full_node_3.full_node.server, full_node_2.full_node.server, self_hostname) + + def check_nodes_in_sync2(): + p1 = full_node_1.full_node.blockchain.get_peak() + # p2 = full_node_2.full_node.blockchain.get_peak() + p3 = full_node_3.full_node.blockchain.get_peak() + return p1 == p3 + + await time_out_assert(950, check_nodes_in_sync2) + + p1 = full_node_1.full_node.blockchain.get_peak() + # p2 = full_node_2.full_node.blockchain.get_peak() + p3 = full_node_3.full_node.blockchain.get_peak() + + assert p1.header_hash == blocks[-1].header_hash + # assert p2.header_hash == blocks[-1].header_hash + assert p3.header_hash == blocks[-1].header_hash