1
+ import os
1
2
import asyncio
2
3
from exo .networking .discovery import Discovery
3
- from typing import Dict , List , Callable
4
+ from typing import Dict , List , Callable , Optional
4
5
5
6
from exo .topology .device_capabilities import DeviceCapabilities
6
7
from exo .networking .manual .network_topology_config import NetworkTopology , PeerConfig
@@ -19,14 +20,19 @@ def __init__(
19
20
self .node_id = node_id
20
21
self .create_peer_handle = create_peer_handle
21
22
self .listen_task = None
23
+ self .cleanup_task = None
22
24
self .known_peers : Dict [str , PeerHandle ] = {}
23
25
26
+ self ._cached_peers : Dict [str , PeerConfig ] = {}
27
+ self ._last_modified_time : Optional [float ] = None
28
+
24
29
async def start (self ) -> None :
25
30
self .listen_task = asyncio .create_task (self .task_find_peers_from_config ())
31
+ self .cleanup_task = asyncio .create_task (self .task_clean_up_peers_from_config ())
26
32
27
33
async def stop (self ) -> None :
28
- if self .listen_task :
29
- self .listen_task .cancel ()
34
+ if self .listen_task : self . listen_task . cancel ()
35
+ if self . cleanup_task : self .cleanup_task .cancel ()
30
36
31
37
async def discover_peers (self , wait_for_peers : int = 0 ) -> List [PeerHandle ]:
32
38
if wait_for_peers > 0 :
@@ -36,6 +42,19 @@ async def discover_peers(self, wait_for_peers: int = 0) -> List[PeerHandle]:
36
42
if DEBUG_DISCOVERY >= 2 : print (f"Discovered peers: { [peer .id () for peer in self .known_peers .values ()]} " )
37
43
return list (self .known_peers .values ())
38
44
45
+ async def task_clean_up_peers_from_config (self ):
46
+ if DEBUG_DISCOVERY >= 2 : print ("Starting task to clean up peers from config..." )
47
+ while True :
48
+ peers_from_config = self ._get_peers ().items ()
49
+ if peers_from_config :
50
+ peers_to_remove = [peer for peer in self .known_peers .keys () if peer not in peers_from_config ]
51
+
52
+ for peer in peers_to_remove :
53
+ if DEBUG_DISCOVERY >= 2 : print (f"{ peer } is no longer found in the config but is currently a known peer. Removing from known peers..." )
54
+ try : del self .known_peers [peer ]
55
+ except KeyError : pass
56
+
57
+ await asyncio .sleep (5.0 )
39
58
40
59
async def task_find_peers_from_config (self ):
41
60
if DEBUG_DISCOVERY >= 2 : print ("Starting task to find peers from config..." )
@@ -56,23 +75,37 @@ async def task_find_peers_from_config(self):
56
75
try : del self .known_peers [peer_id ]
57
76
except KeyError : pass
58
77
except Exception as e :
59
- if DEBUG_DISCOVERY >= 2 : print (f"Exception occured when attempting to add { peer_id = } : { e } " )
78
+ if DEBUG_DISCOVERY >= 2 : print (f"Exception occured when attempting to add { peer_id = } : { e } " )
60
79
await asyncio .sleep (1.0 )
61
80
62
81
if DEBUG_DISCOVERY >= 2 : print (f"Current known peers: { [peer .id () for peer in self .known_peers .values ()]} " )
63
82
64
83
def _get_peers (self ):
65
84
try :
66
- topology = NetworkTopology . from_path (self .network_config_path )
85
+ current_mtime = os . path . getmtime (self .network_config_path )
67
86
68
- if self .node_id not in topology . peers :
69
- raise ValueError ( f"Node ID { self .node_id } not found in network config file { self . network_config_path } . Please run with `node_id` set to one of the keys in the config file: { [ k for k , _ in topology . peers ] } " )
87
+ if self . _cached_peers is not None and self ._last_modified_time is not None and current_mtime <= self . _last_modified_time :
88
+ return self ._cached_peers
70
89
71
- peers_in_network : Dict [str , PeerConfig ] = topology .peers
72
- peers_in_network .pop (self .node_id )
73
- except Exception as e :
74
- if DEBUG_DISCOVERY >= 2 : print (f"Error when loading network config file from { self .network_config_path } . Please update the config file in order to successfully discover peers. Exception: { e } " )
75
- peers_in_network = {}
90
+ topology = NetworkTopology .from_path (self .network_config_path )
76
91
77
- return peers_in_network
92
+ if self .node_id not in topology .peers :
93
+ raise ValueError (
94
+ f"Node ID { self .node_id } not found in network config file "
95
+ f"{ self .network_config_path } . Please run with `node_id` set to "
96
+ f"one of the keys in the config file: { [k for k , _ in topology .peers ]} "
97
+ )
78
98
99
+ peers_in_network : Dict [str , PeerConfig ] = topology .peers
100
+ peers_in_network .pop (self .node_id )
101
+
102
+ self ._cached_peers = peers_in_network
103
+ self ._last_modified_time = current_mtime
104
+
105
+ return peers_in_network
106
+
107
+ except Exception as e :
108
+ if DEBUG_DISCOVERY >= 2 : print (f"Error when loading network config file from { self .network_config_path } . Please update the config file in order to successfully discover peers. Exception: { e } " )
109
+ self ._cached_peers = {}
110
+ self ._last_modified_time = None
111
+ return {}
0 commit comments