-
Notifications
You must be signed in to change notification settings - Fork 0
/
ring-mtr.py
executable file
·318 lines (272 loc) · 10.5 KB
/
ring-mtr.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
#!/usr/bin/env python
# coding: utf-8
from argparse import ArgumentParser, RawTextHelpFormatter
import sys
import logging
from signal import signal, SIGTERM
import os
import random
import dns.resolver
from pssh.clients import ParallelSSHClient
logging.basicConfig(stream=sys.stdout)
logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)
ENVVAR_PREFIX = "RING_MTR_"
def terminate(*_): # pylint: disable=missing-function-docstring
logger.info("Received SIGTERM, exiting.")
sys.exit(0)
def main(): # pylint: disable=missing-function-docstring
signal(SIGTERM, terminate)
parser = ArgumentParser(
description="Perform a MTR towards and from a subset of NLNOG ring nodes",
formatter_class=RawTextHelpFormatter,
)
parser.add_argument(
"-u",
"--user",
default=os.environ.get(ENVVAR_PREFIX + "USER"),
help="SSH user.\nRequired, can be set using the env var "
+ ENVVAR_PREFIX
+ "USER",
)
parser.add_argument(
"-r",
"--root",
default=os.environ.get(ENVVAR_PREFIX + "ROOT"),
help="Ring node to/from which all MTRs will be performed.\nRequired, can be set using the env var "
+ ENVVAR_PREFIX
+ "ROOT",
)
parser.add_argument(
"-n",
"--number",
default=os.environ.get(ENVVAR_PREFIX + "NUMBER", 10),
type=int,
help="Number of ring nodes to randomly select (can be 0).\nDefaults to 10, can be set using the env var "
+ ENVVAR_PREFIX
+ "NUMBER",
)
parser.add_argument(
"-f",
"--force",
nargs="*",
help="Force some nodes (short hostnames) to be present in the list if they have not been selected already.\n"
+ "Can be set using the env var "
+ ENVVAR_PREFIX
+ "FORCE , in a comma-separated list.",
)
parser.add_argument(
"-c",
"--cycles",
default=os.environ.get(ENVVAR_PREFIX + "CYCLES", 10),
type=int,
help="MTR report cycles.\nDefaults to 10, can be set using the env var "
+ ENVVAR_PREFIX
+ "CYCLES",
)
parser.add_argument(
"--connect-timeout",
default=os.environ.get(ENVVAR_PREFIX + "CONNECT_TIMEOUT", 30),
type=int,
help="Timeout, in seconds, when connecting to the nodes.\nDefaults to 30, can be set using the env var "
+ ENVVAR_PREFIX
+ "CONNECT_TIMEOUT",
)
parser.add_argument(
"-4",
"--ipv4",
action="store_true",
help="Force IPv4 MTRs. Mutually exclusive with --ipv6\nDefault to false, can be set by setting the env var "
+ ENVVAR_PREFIX
+ "FORCE_IPV4 to 'true'",
)
parser.add_argument(
"-6",
"--ipv6",
action="store_true",
help="Force IPv6 MTRs. Mutually exclusive with --ipv4\nDefault to false, can be set by setting the env var "
+ ENVVAR_PREFIX
+ "FORCE_IPV6 to 'true'",
)
parser.add_argument(
"--retries",
default=os.environ.get(ENVVAR_PREFIX + "CONNECT_RETRIES", 1),
type=int,
help="Number of retries when connecting to the nodes.\nDefaults to 1, can be set using the env var "
+ ENVVAR_PREFIX
+ "CONNECT_RETRIES",
)
args = parser.parse_args()
# Check user is present
if args.user is None:
parser.print_usage(sys.stderr)
print(
f"{os.path.basename(__file__)}: error: argument --user (or env var {ENVVAR_PREFIX}USER) is mandatory."
)
sys.exit(1)
# Check root is present
if args.root is None:
parser.print_usage(sys.stderr)
print(
f"{os.path.basename(__file__)}: error: argument --root (or env var {ENVVAR_PREFIX}ROOT) is mandatory."
)
sys.exit(1)
# Manually get the forced nodes list from env var if needed, or set an empty list as default
if args.force is None:
envvar = os.environ.get(ENVVAR_PREFIX + "FORCE")
if envvar is None:
args.force = []
else:
args.force = envvar.split(",")
# Check if v4 or v6 was forced
if not args.ipv4:
if os.environ.get(ENVVAR_PREFIX + "FORCE_IPV4", "") == "true":
args.ipv4 = True
if not args.ipv6:
if os.environ.get(ENVVAR_PREFIX + "FORCE_IPV6", "") == "true":
args.ipv6 = True
if args.ipv4 and args.ipv6:
parser.print_usage(sys.stderr)
print(
f"{os.path.basename(__file__)}: error: cannot force both IPv4 and IPv6 simultaneously."
)
sys.exit(1)
elif args.ipv4:
mtr_ip_version_flag = "-4"
elif args.ipv6:
mtr_ip_version_flag = "-6"
else:
mtr_ip_version_flag = ""
# Get all current ring nodes from DNS
resolver = dns.resolver.Resolver()
# Use cloudflare DNS to workaround funky resolvers for the TXT query
resolver.nameservers = ["1.1.1.1", "1.0.0.1"]
answers = resolver.resolve("ring.nlnog.net.", "TXT")
nodes = " ".join([str(i).strip('"') for i in answers]).split()
# 'nodes' will be shaped, so we keep an unaltered list of all ring nodes
all_nodes = nodes.copy()
# Remove root node from the list (and check it is a valid ring node)
try:
nodes.remove(args.root)
except ValueError as exc:
raise ValueError(
f"The selected root, {args.root} is not a known ring node."
) from exc
# Select a subset
nodes = random.sample(nodes, args.number)
# Manually add a forced subset (and check they are valid nodes) :
all_nodes_are_valid = True
invalid_nodes = []
for forced_node in args.force:
if forced_node not in all_nodes:
all_nodes_are_valid = False
invalid_nodes.append(forced_node)
elif forced_node not in nodes: # Add if not already present, or do nothing
nodes.append(forced_node)
if not all_nodes_are_valid:
raise ValueError(f"Forced nodes {invalid_nodes} are not known ring nodes.")
# Let's go
logger.info(
(
"Performing bidirectionnal MTRs %s with %s cycles between '%s' and the following nodes : %s "
"using user '%s', %ss of connect timeout and %s connect retries."
),
mtr_ip_version_flag,
args.cycles,
args.root,
nodes,
args.user,
args.connect_timeout,
args.retries,
)
# Add the DNS suffix
nodes = [n + ".ring.nlnog.net" for n in nodes]
# Perform a dummy connection to the root node (but it could be any node)
# to check that the auth is OK (username, SSH key).
# Otherwise we run the risk of running multiple connection attempts with wrong auth
# and getting fail2ban'ed. For now we let the AuthenticationError exception raise all the way.
test_client = ParallelSSHClient(
[args.root + ".ring.nlnog.net"],
user=args.user,
timeout=args.connect_timeout,
num_retries=args.retries,
)
_ = test_client.run_command("uname")
test_client.join()
# "Inbound" are the MTRs from each of the remote node towards the root node,
# and "outbound" are the MTRs from the root node towards each of the remote nodes
# Run one MTR on each remote node towards our root node
inbound_client = ParallelSSHClient(
nodes, user=args.user, timeout=args.connect_timeout, num_retries=args.retries
)
inbound_output = inbound_client.run_command(
f"mtr {mtr_ip_version_flag} -c {args.cycles} -w -z -b {args.root}.ring.nlnog.net",
stop_on_errors=False,
)
# Run multiple MTRs on our root node towards each of the remote node
outbound_client = ParallelSSHClient(
[args.root + ".ring.nlnog.net"] * len(nodes),
user=args.user,
pool_size=20, # Limit the concurrent connections, since they are all on the same host
timeout=args.connect_timeout,
num_retries=args.retries,
)
outbound_output = outbound_client.run_command(
f"mtr {mtr_ip_version_flag} -c {args.cycles} -w -z -b %s",
host_args=nodes,
stop_on_errors=False,
)
# Wait for all remote commands to terminate
inbound_client.join()
outbound_client.join()
# Index the results per remote node (source for "inbound" MTRs, and target for "outbound" MTRs)
inbound_results = {o.host: o for o in inbound_output}
# The following relies on a consistent list order
outbound_results = {nodes[outbound_output.index(o)]: o for o in outbound_output}
# This thing is a big old printer
print()
for host, inbound_host_output in inbound_results.items():
outbound_host_output = outbound_results[host]
print("-------------------------")
print("Node: " + host)
print("")
# Print information related to MTR from the remote node to the root node (inbound)
print(f"MTR inbound : from {host} to {args.root}.ring.nlnog.net :")
print("")
inbound_stdout = inbound_host_output.stdout
if inbound_host_output.exception is not None:
print(" Encountered an error for inbound MTR:")
print(" " + str(inbound_host_output.exception))
inbound_stdout = [] # This would be None, which is not iterable
for line in inbound_stdout:
print(" " + line)
if inbound_host_output.exit_code:
print("")
print("----")
print(" Exit code was: " + str(inbound_host_output.exit_code))
print(" StdErr:")
for line in inbound_host_output.stderr:
print(" " + line)
print("----")
print("")
# Print information related to MTR from the root node to the remote node (outbound)
print(f"MTR outbound : from {args.root}.ring.nlnog.net to {host} :")
print("")
outbound_stdout = outbound_host_output.stdout
if outbound_host_output.exception is not None:
print(" Encountered an error for outbound MTR :")
print(" " + str(outbound_host_output.exception))
outbound_stdout = [] # This would be None, which is not iterable
for line in outbound_stdout:
print(" " + line)
if outbound_host_output.exit_code:
print("")
print("----")
print(" Exit code was: " + str(outbound_host_output.exit_code))
print(" StdErr:")
for line in outbound_host_output.stderr:
print(" " + line)
print("----")
print("")
if __name__ == "__main__":
main()