From d862a4548109c7dbf2fbca78eaa43e35f0328d6d Mon Sep 17 00:00:00 2001 From: Paolo Capriotti Date: Thu, 2 Feb 2023 17:55:17 +0100 Subject: [PATCH 01/29] New script for running backend locally --- services/integration.sh | 2 +- services/run-services | 161 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 162 insertions(+), 1 deletion(-) create mode 100755 services/run-services diff --git a/services/integration.sh b/services/integration.sh index 648a60aaba..a0ce189476 100755 --- a/services/integration.sh +++ b/services/integration.sh @@ -26,7 +26,7 @@ function list_descendants () { } function kill_gracefully() { - pkill "gundeck|brig|galley|cargohold|cannon|spar|nginz|stern" + pkill "federator|gundeck|brig|galley|cargohold|cannon|spar|nginz|stern" sleep 1 kill $(list_descendants "$PARENT_PID") &> /dev/null } diff --git a/services/run-services b/services/run-services new file mode 100755 index 0000000000..ccf86f446d --- /dev/null +++ b/services/run-services @@ -0,0 +1,161 @@ +#!/usr/bin/env python3 + +from dataclasses import dataclass +import os +import select +import signal +import subprocess +import yaml +import urllib.request +import urllib.error +import sys +from threading import Thread + +class Colors: + GREEN = "\x1b[38;5;10m" + YELLOW = "\x1b[38;5;11m" + BLUE = "\x1b[38;5;6m" + PURPLEISH = "\x1b[38;5;13m" + ORANGE = "\x1b[38;5;3m" + RESET = "\x1b[0m" + +@dataclass(frozen=True) +class Service: + name: str + color: str + _internal_name: str = None + check_status: bool = True + + @property + def internal_name(self): + if self._internal_name is None: + return self.name + else: + return self._internal_name + + def path(self): + return os.path.join(ROOT, "dist", self.name) + + def config_file(self): + return os.path.join(ROOT, "services", self.name, + self.name + ".integration.yaml") + + def modified_config_file(self, service_map): + (r, w) = os.pipe() + + with open(self.config_file()) as f: + data = yaml.safe_load(f) + + for service, port in service_map.items(): + if service.internal_name in data: + data[service.internal_name]['port'] = port + + out = os.fdopen(w, 'w') + yaml.dump(data, out) + + return f"/proc/{os.getpid()}/fd/{r}" + +def check_prerequisites(): + pass + +def spawn(service, service_map, environment): + config_file = service.modified_config_file(service_map) + sub = subprocess.Popen([service.path(), "-c", config_file], + encoding='utf-8', + cwd=os.path.join(ROOT, "services", service.name), + env=environment, stdout=subprocess.PIPE) + + port = service_map[service] + if service.check_status: + while True: + sub.poll() + if sub.returncode is not None: + raise Exception(f"{service.name} has terminated") + try: + with urllib.request.urlopen(f"http://localhost:{port}/i/status") as resp: + if resp.status in [200, 204]: break + except urllib.error.URLError: + pass + + def colour_output(): + try: + for line in sub.stdout: + print(service.color, end='') + print(f"[{service.name}] ", end='') + print(line, end='') + print(Colors.RESET, end='') + finally: + sub.terminate() + sub.wait() + + t = Thread(target=colour_output) + t.start() + return t + +def find_root(base): + # find git repository + root = os.path.realpath(base) + while not os.path.exists(os.path.join(root, ".git")): + p = os.path.dirname(root) + if p == root: raise Exception("Could not find wire-server root") + root = p + return root + +if __name__ == '__main__': + ROOT = find_root(os.getcwd()) + if ROOT is None: + error("This script needs to be run within the wire-server direnv") + + environment = { + 'AWS_REGION': "eu-west-1", + 'AWS_ACCESS_KEY_ID': "dummykey", + 'AWS_SECRET_ACCESS_KEY': "dummysecret" + } + + brig = Service("brig", Colors.GREEN) + galley = Service("galley", Colors.YELLOW) + gundeck = Service("gundeck", Colors.BLUE) + cannon = Service("cannon", Colors.ORANGE) + cargohold = Service("cargohold", Colors.PURPLEISH) + spar = Service("spar", Colors.ORANGE) + federator = Service("federator", Colors.BLUE, + "federatorInternal", check_status=False) + stern = Service("stern", Colors.YELLOW) + + service_map = { + brig: 8082, + galley: 8085, + gundeck: 8086, + cannon: 8083, + cargohold: 8084, + spar: 8088, + federator: 8097 + } + + try: + # TODO: start services in parallel + for service in service_map.keys(): + spawn(service, service_map, environment) + + + # procs[proc.stdout] = { + # 'process': proc, + # 'service': service + # } + + # while True: + # (ready, _, _) = select.select([proc['process'].stdout + # for proc in procs.values()], [], []) + # for f in ready: + # data = f.read().decode('utf-8') + # col = procs[f]['service'].color + # print(col, end='') + # print(data, end='') + # print(Colors.RESET, end='') + # sys.stdout.flush() + except KeyboardInterrupt: + pass + # finally: + # for proc in procs.values(): + # proc['process'].terminate() + # proc['process'].wait() From 8b5ff16628567233eea6177104ce260470450ca2 Mon Sep 17 00:00:00 2001 From: Paolo Capriotti Date: Fri, 3 Feb 2023 11:12:09 +0100 Subject: [PATCH 02/29] Add nginz to script --- services/run-services | 119 ++++++++++++++++++++++-------------------- 1 file changed, 63 insertions(+), 56 deletions(-) diff --git a/services/run-services b/services/run-services index ccf86f446d..f22aa13065 100755 --- a/services/run-services +++ b/services/run-services @@ -4,12 +4,14 @@ from dataclasses import dataclass import os import select import signal +import shutil import subprocess import yaml import urllib.request import urllib.error import sys from threading import Thread +import logging class Colors: GREEN = "\x1b[38;5;10m" @@ -55,42 +57,59 @@ class Service: return f"/proc/{os.getpid()}/fd/{r}" + def spawn(self, service_map, environment): + config_file = self.modified_config_file(service_map) + sub = subprocess.Popen([self.path(), "-c", config_file], + encoding='utf-8', + cwd=os.path.join(ROOT, "services", self.name), + env=environment, stdout=subprocess.PIPE) + + if self.check_status: + check_status(sub, service_map[self], self) + colour_output(sub, self) + +@dataclass(frozen=True) +class Nginz: + color: str + + @property + def name(self): return "nginz" + + @property + def internal_name(self): return self.name + + def spawn(self, service_map, environment): + cwd = os.path.join(ROOT, "services", "nginz", "integration-test") + sub = subprocess.Popen([shutil.which("nginx"), "-p", cwd, "-c", + os.path.join(cwd, "conf", "nginz","nginx.conf"), + "-g", "daemon off;"], + encoding='utf-8', cwd=cwd, env=environment, + stdout=subprocess.PIPE) + check_status(sub, service_map[self], self) + colour_output(sub, self) + def check_prerequisites(): pass -def spawn(service, service_map, environment): - config_file = service.modified_config_file(service_map) - sub = subprocess.Popen([service.path(), "-c", config_file], - encoding='utf-8', - cwd=os.path.join(ROOT, "services", service.name), - env=environment, stdout=subprocess.PIPE) - - port = service_map[service] - if service.check_status: - while True: - sub.poll() - if sub.returncode is not None: - raise Exception(f"{service.name} has terminated") - try: - with urllib.request.urlopen(f"http://localhost:{port}/i/status") as resp: - if resp.status in [200, 204]: break - except urllib.error.URLError: - pass - - def colour_output(): +def check_status(sub, port, service): + while True: + sub.poll() + if sub.returncode is not None: + raise Exception(f"{service.name} has terminated") try: - for line in sub.stdout: - print(service.color, end='') - print(f"[{service.name}] ", end='') - print(line, end='') - print(Colors.RESET, end='') - finally: - sub.terminate() - sub.wait() - - t = Thread(target=colour_output) - t.start() - return t + with urllib.request.urlopen(f"http://localhost:{port}/i/status") as resp: + if resp.status in [200, 204]: break + except urllib.error.URLError: + pass + +def colour_output(sub, service): + try: + for line in sub.stdout: + logging.info(f"{service.color}[{service.name}] {line.rstrip()}{Colors.RESET}") + finally: + sub.terminate() + sub.wait() + def find_root(base): # find git repository @@ -102,6 +121,8 @@ def find_root(base): return root if __name__ == '__main__': + logging.basicConfig(encoding='utf-8', level=logging.INFO, + format='%(message)s') ROOT = find_root(os.getcwd()) if ROOT is None: error("This script needs to be run within the wire-server direnv") @@ -121,6 +142,7 @@ if __name__ == '__main__': federator = Service("federator", Colors.BLUE, "federatorInternal", check_status=False) stern = Service("stern", Colors.YELLOW) + nginz = Nginz(Colors.PURPLEISH) service_map = { brig: 8082, @@ -129,33 +151,18 @@ if __name__ == '__main__': cannon: 8083, cargohold: 8084, spar: 8088, - federator: 8097 + federator: 8097, + nginz: 8080 } try: - # TODO: start services in parallel + threads = [] for service in service_map.keys(): - spawn(service, service_map, environment) - - - # procs[proc.stdout] = { - # 'process': proc, - # 'service': service - # } - - # while True: - # (ready, _, _) = select.select([proc['process'].stdout - # for proc in procs.values()], [], []) - # for f in ready: - # data = f.read().decode('utf-8') - # col = procs[f]['service'].color - # print(col, end='') - # print(data, end='') - # print(Colors.RESET, end='') - # sys.stdout.flush() + t = Thread(target=lambda: + service.spawn(service_map, environment)) + t.start() + threads.append(t) + + for thread in threads: thread.join() except KeyboardInterrupt: pass - # finally: - # for proc in procs.values(): - # proc['process'].terminate() - # proc['process'].wait() From ff175152f8ead1cb8447599186243897eef5fd5f Mon Sep 17 00:00:00 2001 From: Paolo Capriotti Date: Fri, 3 Feb 2023 11:34:47 +0100 Subject: [PATCH 03/29] Check prerequisites --- services/run-services | 36 ++++++++++++++++++++++++++++++++---- 1 file changed, 32 insertions(+), 4 deletions(-) diff --git a/services/run-services b/services/run-services index f22aa13065..f46111f0dd 100755 --- a/services/run-services +++ b/services/run-services @@ -5,6 +5,7 @@ import os import select import signal import shutil +import socket import subprocess import yaml import urllib.request @@ -19,6 +20,7 @@ class Colors: BLUE = "\x1b[38;5;6m" PURPLEISH = "\x1b[38;5;13m" ORANGE = "\x1b[38;5;3m" + RED = "\x1b[38;5;1m" RESET = "\x1b[0m" @dataclass(frozen=True) @@ -62,12 +64,18 @@ class Service: sub = subprocess.Popen([self.path(), "-c", config_file], encoding='utf-8', cwd=os.path.join(ROOT, "services", self.name), - env=environment, stdout=subprocess.PIPE) + env=environment, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT) if self.check_status: check_status(sub, service_map[self], self) colour_output(sub, self) + def check_exists(self): + if not os.path.exists(self.path()): + raise Exception(f"{self.name} not found") + @dataclass(frozen=True) class Nginz: color: str @@ -84,12 +92,30 @@ class Nginz: os.path.join(cwd, "conf", "nginz","nginx.conf"), "-g", "daemon off;"], encoding='utf-8', cwd=cwd, env=environment, - stdout=subprocess.PIPE) + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT) check_status(sub, service_map[self], self) colour_output(sub, self) -def check_prerequisites(): - pass + def check_exists(self): + if shutil.which("nginx") is None: + raise Exception("nginx not found") + +def check_prerequisites(service_map): + try: + for port in (9042, 9200, 6379): + with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: + s.connect(("127.0.0.1", port)) + except Exception as e: + logging.error(f"{Colors.RED}Databases not up. Try running 'deploy/dockerephemeral/run.sh'. {Colors.RESET}") + sys.exit(1) + + try: + for service in service_map: + service.check_exists() + except Exception as e: + logging.error(Colors.RED + str(e) + Colors.RESET) + sys.exit(1) def check_status(sub, port, service): while True: @@ -155,6 +181,8 @@ if __name__ == '__main__': nginz: 8080 } + check_prerequisites(service_map) + try: threads = [] for service in service_map.keys(): From 74c9484a51547e4cfc70a64e6461983d5926250a Mon Sep 17 00:00:00 2001 From: Paolo Capriotti Date: Fri, 3 Feb 2023 12:14:52 +0100 Subject: [PATCH 04/29] Wait for check_status --- services/run-services | 39 +++++++++++++++++++++++++++++++++------ 1 file changed, 33 insertions(+), 6 deletions(-) diff --git a/services/run-services b/services/run-services index f46111f0dd..3157fd014d 100755 --- a/services/run-services +++ b/services/run-services @@ -11,9 +11,30 @@ import yaml import urllib.request import urllib.error import sys -from threading import Thread +import threading import logging +class WaitGroup: + def __init__(self): + self.count = 0 + self.cv = threading.Condition() + + def add(self): + with self.cv: + self.count += 1 + + def remove(self): + with self.cv: + self.count -= 1 + if self.count == 0: + self.cv.notify_all() + + def wait(self): + self.cv.acquire() + while self.count > 0: + self.cv.wait() + self.cv.release() + class Colors: GREEN = "\x1b[38;5;10m" YELLOW = "\x1b[38;5;11m" @@ -59,7 +80,7 @@ class Service: return f"/proc/{os.getpid()}/fd/{r}" - def spawn(self, service_map, environment): + def spawn(self, service_map, environment, group): config_file = self.modified_config_file(service_map) sub = subprocess.Popen([self.path(), "-c", config_file], encoding='utf-8', @@ -67,9 +88,9 @@ class Service: env=environment, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) - if self.check_status: check_status(sub, service_map[self], self) + group.remove() colour_output(sub, self) def check_exists(self): @@ -86,7 +107,7 @@ class Nginz: @property def internal_name(self): return self.name - def spawn(self, service_map, environment): + def spawn(self, service_map, environment, group): cwd = os.path.join(ROOT, "services", "nginz", "integration-test") sub = subprocess.Popen([shutil.which("nginx"), "-p", cwd, "-c", os.path.join(cwd, "conf", "nginz","nginx.conf"), @@ -95,6 +116,7 @@ class Nginz: stdout=subprocess.PIPE, stderr=subprocess.STDOUT) check_status(sub, service_map[self], self) + group.remove() colour_output(sub, self) def check_exists(self): @@ -183,14 +205,19 @@ if __name__ == '__main__': check_prerequisites(service_map) + group = WaitGroup() try: threads = [] for service in service_map.keys(): - t = Thread(target=lambda: - service.spawn(service_map, environment)) + group.add() + t = threading.Thread(target=lambda: + service.spawn(service_map, environment, group)) t.start() threads.append(t) + group.wait() + print("XXXXX") + for thread in threads: thread.join() except KeyboardInterrupt: pass From c4f36864c9d4d9e79d3829fd1684cc8224221ccb Mon Sep 17 00:00:00 2001 From: Paolo Capriotti Date: Fri, 3 Feb 2023 12:40:59 +0100 Subject: [PATCH 05/29] Start services synchronously --- services/run-services | 108 +++++++++++++++----------------- services/start-services-only.sh | 2 +- 2 files changed, 52 insertions(+), 58 deletions(-) diff --git a/services/run-services b/services/run-services index 3157fd014d..01d2d5ffdd 100755 --- a/services/run-services +++ b/services/run-services @@ -11,30 +11,10 @@ import yaml import urllib.request import urllib.error import sys +import time import threading import logging -class WaitGroup: - def __init__(self): - self.count = 0 - self.cv = threading.Condition() - - def add(self): - with self.cv: - self.count += 1 - - def remove(self): - with self.cv: - self.count -= 1 - if self.count == 0: - self.cv.notify_all() - - def wait(self): - self.cv.acquire() - while self.count > 0: - self.cv.wait() - self.cv.release() - class Colors: GREEN = "\x1b[38;5;10m" YELLOW = "\x1b[38;5;11m" @@ -66,6 +46,12 @@ class Service: self.name + ".integration.yaml") def modified_config_file(self, service_map): + """Overwrite port configuration on this service using the provided + service_map. + + This works by creating an unnamed pipe, writing the modified config + file to it, and returning a path to the read end of the pipe (in + /proc).""" (r, w) = os.pipe() with open(self.config_file()) as f: @@ -80,18 +66,14 @@ class Service: return f"/proc/{os.getpid()}/fd/{r}" - def spawn(self, service_map, environment, group): + def spawn(self, service_map, environment): config_file = self.modified_config_file(service_map) - sub = subprocess.Popen([self.path(), "-c", config_file], + return subprocess.Popen([self.path(), "-c", config_file], encoding='utf-8', cwd=os.path.join(ROOT, "services", self.name), env=environment, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) - if self.check_status: - check_status(sub, service_map[self], self) - group.remove() - colour_output(sub, self) def check_exists(self): if not os.path.exists(self.path()): @@ -107,17 +89,17 @@ class Nginz: @property def internal_name(self): return self.name - def spawn(self, service_map, environment, group): + @property + def check_status(self): return True + + def spawn(self, service_map, environment): cwd = os.path.join(ROOT, "services", "nginz", "integration-test") - sub = subprocess.Popen([shutil.which("nginx"), "-p", cwd, "-c", + return subprocess.Popen([shutil.which("nginx"), "-p", cwd, "-c", os.path.join(cwd, "conf", "nginz","nginx.conf"), "-g", "daemon off;"], - encoding='utf-8', cwd=cwd, env=environment, - stdout=subprocess.PIPE, - stderr=subprocess.STDOUT) - check_status(sub, service_map[self], self) - group.remove() - colour_output(sub, self) + encoding='utf-8', cwd=cwd, env=environment, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT) def check_exists(self): if shutil.which("nginx") is None: @@ -140,17 +122,18 @@ def check_prerequisites(service_map): sys.exit(1) def check_status(sub, port, service): - while True: - sub.poll() - if sub.returncode is not None: - raise Exception(f"{service.name} has terminated") - try: - with urllib.request.urlopen(f"http://localhost:{port}/i/status") as resp: - if resp.status in [200, 204]: break - except urllib.error.URLError: - pass - -def colour_output(sub, service): + sub.poll() + if sub.returncode is not None: + raise Exception(f"{service.name} has terminated") + if not service.check_status: + return True + try: + with urllib.request.urlopen(f"http://localhost:{port}/i/status") as resp: + return resp.status in [200, 204] + except urllib.error.URLError: + return False + +def color_output(sub, service): try: for line in sub.stdout: logging.info(f"{service.color}[{service.name}] {line.rstrip()}{Colors.RESET}") @@ -205,19 +188,30 @@ if __name__ == '__main__': check_prerequisites(service_map) - group = WaitGroup() try: - threads = [] + threads = {} for service in service_map.keys(): - group.add() - t = threading.Thread(target=lambda: - service.spawn(service_map, environment, group)) + sub = service.spawn(service_map, environment) + t = threading.Thread(target=lambda: color_output(sub, service)) t.start() - threads.append(t) - - group.wait() - print("XXXXX") - - for thread in threads: thread.join() + threads[service] = { + 'thread': t, + 'process': sub + } + + # check status + services = set(threads) + while services: + service = next(iter(services)) + if check_status(threads[service]['process'], + service_map[service], + service): + services.remove(service) + + # main script + subprocess.run(sys.argv[1:], check=True) + + for service in threads: threads[service]['process'].terminate() + for service in threads: threads[service]['thread'].join() except KeyboardInterrupt: pass diff --git a/services/start-services-only.sh b/services/start-services-only.sh index 9d7ea41c09..c445f8f21c 100755 --- a/services/start-services-only.sh +++ b/services/start-services-only.sh @@ -8,4 +8,4 @@ set -eo pipefail SERVICES_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" # call integration.sh, show a message, then sleep (instead of executing a test executable) -"$SERVICES_DIR/integration.sh" bash -c 'printf "(This will hang, Control+C to close.)\nNow you can manually curl them or start an integration test executable manually with e.g. \n(first cd to a service dir for correct working directory)\n cd services/brig && ../../dist/brig-integration -s brig.integration.yaml -i ../integration.yaml\n" && sleep 1000000' +"/usr/bin/python3" "$SERVICES_DIR/run-services" bash -c 'printf "(This will hang, Control+C to close.)\nNow you can manually curl them or start an integration test executable manually with e.g. \n(first cd to a service dir for correct working directory)\n cd services/brig && ../../dist/brig-integration -s brig.integration.yaml -i ../integration.yaml\n" && sleep 1000000' From 8ee426225781fc76a8f69e50488bbdc4529eb950 Mon Sep 17 00:00:00 2001 From: Paolo Capriotti Date: Fri, 3 Feb 2023 14:08:03 +0100 Subject: [PATCH 06/29] Send SIGTERM again if processes don't terminate --- hack/bin/cabal-run-integration.sh | 2 +- services/run-services | 19 ++++++++++++++----- 2 files changed, 15 insertions(+), 6 deletions(-) diff --git a/hack/bin/cabal-run-integration.sh b/hack/bin/cabal-run-integration.sh index 285744f0e1..f92a8c5284 100755 --- a/hack/bin/cabal-run-integration.sh +++ b/hack/bin/cabal-run-integration.sh @@ -46,7 +46,7 @@ run_integration_tests() { service_dir="$TOP_LEVEL/services/$package" cd "$service_dir" - "$TOP_LEVEL/services/integration.sh" \ + "/usr/bin/python3" "$TOP_LEVEL/services/run-services" \ "$TOP_LEVEL/dist/$package-integration" \ -s "$service_dir/$package.integration.yaml" \ -i "$TOP_LEVEL/services/integration.yaml" \ diff --git a/services/run-services b/services/run-services index 01d2d5ffdd..ecaac69976 100755 --- a/services/run-services +++ b/services/run-services @@ -141,7 +141,6 @@ def color_output(sub, service): sub.terminate() sub.wait() - def find_root(base): # find git repository root = os.path.realpath(base) @@ -209,9 +208,19 @@ if __name__ == '__main__': services.remove(service) # main script - subprocess.run(sys.argv[1:], check=True) - - for service in threads: threads[service]['process'].terminate() - for service in threads: threads[service]['thread'].join() + ret = subprocess.run(sys.argv[1:]) + sys.exit(ret.returncode) except KeyboardInterrupt: pass + finally: + for service in threads: + threads[service]['process'].terminate() + + for service in threads: + t = threads[service]['thread'] + t.join(timeout=0.1) + # some services don't react promptly to SIGTERM, so we give them a + # nudge if they don't terminate within a few milliseconds + if t.is_alive(): + threads[service]['process'].terminate() + t.join() From 270136efdc672bc09ab366dd2f3c49afef267c79 Mon Sep 17 00:00:00 2001 From: Paolo Capriotti Date: Fri, 3 Feb 2023 14:19:51 +0100 Subject: [PATCH 07/29] Small refactoring --- services/run-services | 89 +++++++++++++++++++++++-------------------- 1 file changed, 47 insertions(+), 42 deletions(-) diff --git a/services/run-services b/services/run-services index ecaac69976..1037f833ee 100755 --- a/services/run-services +++ b/services/run-services @@ -105,7 +105,13 @@ class Nginz: if shutil.which("nginx") is None: raise Exception("nginx not found") -def check_prerequisites(service_map): +@dataclass(frozen=True) +class Instance: + service: Service + thread: threading.Thread + process: subprocess.Popen + +def check_prerequisites(services): try: for port in (9042, 9200, 6379): with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: @@ -115,7 +121,7 @@ def check_prerequisites(service_map): sys.exit(1) try: - for service in service_map: + for service in services: service.check_exists() except Exception as e: logging.error(Colors.RED + str(e) + Colors.RESET) @@ -150,6 +156,17 @@ def find_root(base): root = p return root +BRIG = Service("brig", Colors.GREEN) +GALLEY = Service("galley", Colors.YELLOW) +GUNDECK = Service("gundeck", Colors.BLUE) +CANNON = Service("cannon", Colors.ORANGE) +CARGOHOLD = Service("cargohold", Colors.PURPLEISH) +SPAR = Service("spar", Colors.ORANGE) +FEDERATOR = Service("federator", Colors.BLUE, + "federatorInternal", check_status=False) +STERN = Service("stern", Colors.YELLOW) +NGINZ = Nginz(Colors.PURPLEISH) + if __name__ == '__main__': logging.basicConfig(encoding='utf-8', level=logging.INFO, format='%(message)s') @@ -163,49 +180,38 @@ if __name__ == '__main__': 'AWS_SECRET_ACCESS_KEY': "dummysecret" } - brig = Service("brig", Colors.GREEN) - galley = Service("galley", Colors.YELLOW) - gundeck = Service("gundeck", Colors.BLUE) - cannon = Service("cannon", Colors.ORANGE) - cargohold = Service("cargohold", Colors.PURPLEISH) - spar = Service("spar", Colors.ORANGE) - federator = Service("federator", Colors.BLUE, - "federatorInternal", check_status=False) - stern = Service("stern", Colors.YELLOW) - nginz = Nginz(Colors.PURPLEISH) - + instances = { + CANNON: 2 + } service_map = { - brig: 8082, - galley: 8085, - gundeck: 8086, - cannon: 8083, - cargohold: 8084, - spar: 8088, - federator: 8097, - nginz: 8080 + BRIG: 8082, + GALLEY: 8085, + GUNDECK: 8086, + CANNON: 8083, + CARGOHOLD: 8084, + SPAR: 8088, + FEDERATOR: 8097, + NGINZ: 8080 } - check_prerequisites(service_map) + instances = set() try: - threads = {} for service in service_map.keys(): sub = service.spawn(service_map, environment) t = threading.Thread(target=lambda: color_output(sub, service)) t.start() - threads[service] = { - 'thread': t, - 'process': sub - } + instance = Instance(service, t, sub) + instances.add(instance) # check status - services = set(threads) - while services: - service = next(iter(services)) - if check_status(threads[service]['process'], - service_map[service], - service): - services.remove(service) + to_be_checked = set(instances) + while to_be_checked: + instance = next(iter(to_be_checked)) + if check_status(instance.process, + service_map[instance.service], + instance.service): + to_be_checked.remove(instance) # main script ret = subprocess.run(sys.argv[1:]) @@ -213,14 +219,13 @@ if __name__ == '__main__': except KeyboardInterrupt: pass finally: - for service in threads: - threads[service]['process'].terminate() + for instance in instances: + instance.process.terminate() - for service in threads: - t = threads[service]['thread'] - t.join(timeout=0.1) + for instance in instances: + instance.thread.join(timeout=0.1) # some services don't react promptly to SIGTERM, so we give them a # nudge if they don't terminate within a few milliseconds - if t.is_alive(): - threads[service]['process'].terminate() - t.join() + if instance.thread.is_alive(): + instance.process.terminate() + instance.thread.join() From 41646c159a419891a1598af6d4f5fbcc4d29d6bd Mon Sep 17 00:00:00 2001 From: Paolo Capriotti Date: Fri, 3 Feb 2023 14:48:08 +0100 Subject: [PATCH 08/29] Support multiple instances of the same service --- hack/bin/cabal-run-integration.sh | 1 + .../conf/nginz/integration.b.conf | 19 ++ .../conf/nginz/integration.default.conf | 19 ++ .../integration-test/conf/nginz/nginx.conf | 20 +- services/run-services | 233 +++++++++++++----- 5 files changed, 205 insertions(+), 87 deletions(-) create mode 100644 services/nginz/integration-test/conf/nginz/integration.b.conf create mode 100644 services/nginz/integration-test/conf/nginz/integration.default.conf diff --git a/hack/bin/cabal-run-integration.sh b/hack/bin/cabal-run-integration.sh index f92a8c5284..758855e4b6 100755 --- a/hack/bin/cabal-run-integration.sh +++ b/hack/bin/cabal-run-integration.sh @@ -46,6 +46,7 @@ run_integration_tests() { service_dir="$TOP_LEVEL/services/$package" cd "$service_dir" + # "$TOP_LEVEL/services/integration.sh" \ "/usr/bin/python3" "$TOP_LEVEL/services/run-services" \ "$TOP_LEVEL/dist/$package-integration" \ -s "$service_dir/$package.integration.yaml" \ diff --git a/services/nginz/integration-test/conf/nginz/integration.b.conf b/services/nginz/integration-test/conf/nginz/integration.b.conf new file mode 100644 index 0000000000..36004d5126 --- /dev/null +++ b/services/nginz/integration-test/conf/nginz/integration.b.conf @@ -0,0 +1,19 @@ +# plain TCP/http listening for integration tests only. +listen 8280; +listen 8281; + +# for nginx-without-tls, we need to use a separate port for http2 traffic, +# as nginx cannot handle unencrypted http1 and http2 trafic on the same +# port. +# This port is only used for trying out nginx http2 forwarding without TLS locally and should not +# be ported to any production nginz config. +listen 8290 http2; + +######## TLS/SSL block start ############## +# +# Most integration tests simply use the http ports 8080 and 8081 +# But to also test tls forwarding, this port can be used. +# This applies only locally, as for kubernetes (helm chart) based deployments, +# TLS is terminated at the ingress level, not at nginz level +listen 8643 ssl http2; +listen [::]:8643 ssl http2; diff --git a/services/nginz/integration-test/conf/nginz/integration.default.conf b/services/nginz/integration-test/conf/nginz/integration.default.conf new file mode 100644 index 0000000000..baae352c92 --- /dev/null +++ b/services/nginz/integration-test/conf/nginz/integration.default.conf @@ -0,0 +1,19 @@ +# plain TCP/http listening for integration tests only. +listen 8080; +listen 8081; + +# for nginx-without-tls, we need to use a separate port for http2 traffic, +# as nginx cannot handle unencrypted http1 and http2 trafic on the same +# port. +# This port is only used for trying out nginx http2 forwarding without TLS locally and should not +# be ported to any production nginz config. +listen 8090 http2; + +######## TLS/SSL block start ############## +# +# Most integration tests simply use the http ports 8080 and 8081 +# But to also test tls forwarding, this port can be used. +# This applies only locally, as for kubernetes (helm chart) based deployments, +# TLS is terminated at the ingress level, not at nginz level +listen 8443 ssl http2; +listen [::]:8443 ssl http2; diff --git a/services/nginz/integration-test/conf/nginz/nginx.conf b/services/nginz/integration-test/conf/nginz/nginx.conf index e1d5f3fb6c..1e94976798 100644 --- a/services/nginz/integration-test/conf/nginz/nginx.conf +++ b/services/nginz/integration-test/conf/nginz/nginx.conf @@ -106,25 +106,7 @@ http { # server { - # plain TCP/http listening for integration tests only. - listen 8080; - listen 8081; - - # for nginx-without-tls, we need to use a separate port for http2 traffic, - # as nginx cannot handle unencrypted http1 and http2 trafic on the same - # port. - # This port is only used for trying out nginx http2 forwarding without TLS locally and should not - # be ported to any production nginz config. - listen 8090 http2; - - ######## TLS/SSL block start ############## - # - # Most integration tests simply use the http ports 8080 and 8081 - # But to also test tls forwarding, this port can be used. - # This applies only locally, as for kubernetes (helm chart) based deployments, - # TLS is terminated at the ingress level, not at nginz level - listen 8443 ssl http2; - listen [::]:8443 ssl http2; + include integration.default.conf; # self-signed certificates generated using wire-server/hack/bin/selfsigned.sh ssl_certificate integration-leaf.pem; diff --git a/services/run-services b/services/run-services index 1037f833ee..2d4f08ee15 100755 --- a/services/run-services +++ b/services/run-services @@ -2,6 +2,7 @@ from dataclasses import dataclass import os +import re import select import signal import shutil @@ -11,6 +12,7 @@ import yaml import urllib.request import urllib.error import sys +import tempfile import time import threading import logging @@ -45,29 +47,9 @@ class Service: return os.path.join(ROOT, "services", self.name, self.name + ".integration.yaml") - def modified_config_file(self, service_map): - """Overwrite port configuration on this service using the provided - service_map. - - This works by creating an unnamed pipe, writing the modified config - file to it, and returning a path to the read end of the pipe (in - /proc).""" - (r, w) = os.pipe() - - with open(self.config_file()) as f: - data = yaml.safe_load(f) - - for service, port in service_map.items(): - if service.internal_name in data: - data[service.internal_name]['port'] = port - - out = os.fdopen(w, 'w') - yaml.dump(data, out) - return f"/proc/{os.getpid()}/fd/{r}" - - def spawn(self, service_map, environment): - config_file = self.modified_config_file(service_map) + def spawn(self, config_file, environment): + print('spawn', self.name, config_file) return subprocess.Popen([self.path(), "-c", config_file], encoding='utf-8', cwd=os.path.join(ROOT, "services", self.name), @@ -92,10 +74,15 @@ class Nginz: @property def check_status(self): return True - def spawn(self, service_map, environment): + def config_file(self): + return os.path.join(ROOT, "services", "nginz", "integration-test", + "conf", "nginz","nginx.conf") + + def spawn(self, config_file, environment): cwd = os.path.join(ROOT, "services", "nginz", "integration-test") + print(cwd) return subprocess.Popen([shutil.which("nginx"), "-p", cwd, "-c", - os.path.join(cwd, "conf", "nginz","nginx.conf"), + config_file, "-g", "daemon off;"], encoding='utf-8', cwd=cwd, env=environment, stdout=subprocess.PIPE, @@ -108,8 +95,97 @@ class Nginz: @dataclass(frozen=True) class Instance: service: Service - thread: threading.Thread - process: subprocess.Popen + port: int + thread: threading.Thread = None + process: subprocess.Popen = None + + def check_status(self): + self.process.poll() + if self.process.returncode is not None: + raise Exception(f"{self.service.name} has terminated") + if not self.service.check_status: + return True + try: + with urllib.request.urlopen(f"http://localhost:{self.port}/i/status") as resp: + return resp.status in [200, 204] + except urllib.error.URLError: + return False + + def spawn(self, service_map, environment): + config_file = self.modified_config_file(service_map) + sub = self.service.spawn(config_file, environment) + t = threading.Thread(target=lambda: color_output(sub, self.service)) + t.start() + return Instance(self.service, self.port, t, sub) + + def modified_config_file(self, service_map): + """Overwrite port configuration on this service using the provided + service_map. + + This works by creating an unnamed pipe, writing the modified config + file to it, and returning a path to the read end of the pipe (in + /proc).""" + + def get_port(service): + if service is self.service: + return self.port + else: + return service_map[service] + + with open(self.service.config_file()) as f: + data = yaml.safe_load(f) + + for service in service_map: + if service.internal_name in data: + data[service.internal_name]['port'] = get_port(service) + self.set_own_port(data) + + # write modified config file to pipe + return make_pipe(yaml.dump(data).encode('utf-8')) + + def set_own_port(self, data): + # spar's own port is in a different place + if 'saml' in data: + data['saml']['spPort'] = self.port + +class FederatorInstance(Instance): + def __init__(self, internal_port, external_port): + self.external_port = external_port + super().__init__(FEDERATOR, internal_port) + + def set_own_port(self, data): + # set external port only, as the internal one is part of the service + # map and is set by the general config logic + data['federatorExternal']['port'] = self.external_port + +class NginzInstance(Instance): + def __init__(self, local_port, http2_port, ssl_port): + self.http2_port = http2_port + self.ssl_port = ssl_port + self.temp_files = [] + super().__init__(NGINZ, local_port) + + def modified_config_file(self, service_map): + override = f""" + listen {self.port}; + listen {self.http2_port} http2; + listen {self.ssl_port} ssl http2; + listen [::]:{self.ssl_port} ssl http2;""" + override_file = tempfile.NamedTemporaryFile() + self.temp_files.append(override_file) + override_file.write(override.encode('utf-8')) + override_file.flush() + + config = open(self.service.config_file()).read() + config = re.sub(r'^include integration.default.conf;$', + f"include {override_file}", + config, re.MULTILINE) + config_file = tempfile.NamedTemporaryFile() + self.temp_files.append(config_file) + config_file.write(config.encode('utf-8')) + config_file.flush() + + return config_file.name def check_prerequisites(services): try: @@ -127,18 +203,6 @@ def check_prerequisites(services): logging.error(Colors.RED + str(e) + Colors.RESET) sys.exit(1) -def check_status(sub, port, service): - sub.poll() - if sub.returncode is not None: - raise Exception(f"{service.name} has terminated") - if not service.check_status: - return True - try: - with urllib.request.urlopen(f"http://localhost:{port}/i/status") as resp: - return resp.status in [200, 204] - except urllib.error.URLError: - return False - def color_output(sub, service): try: for line in sub.stdout: @@ -156,6 +220,12 @@ def find_root(base): root = p return root +def make_pipe(data): + (r, w) = os.pipe() + os.write(w, data) + os.close(w) + return f"/proc/{os.getpid()}/fd/{r}" + BRIG = Service("brig", Colors.GREEN) GALLEY = Service("galley", Colors.YELLOW) GUNDECK = Service("gundeck", Colors.BLUE) @@ -167,6 +237,28 @@ FEDERATOR = Service("federator", Colors.BLUE, STERN = Service("stern", Colors.YELLOW) NGINZ = Nginz(Colors.PURPLEISH) +def start_backend(services): + # build a service map by choosing an arbitrary instance of each service + service_map = dict((s.service, s.port) for s in services) + + instances = set() + for blueprint in services: + instances.add(blueprint.spawn(service_map, environment)) + + # check status + # TODO: global timeout + to_be_checked = set(instances) + while to_be_checked: + instance = next(iter(to_be_checked)) + print("checking status", instance.service.name) + if instance.check_status(): + to_be_checked.remove(instance) + else: + time.sleep(0.1) + + print("---- backend started ----") + return instances + if __name__ == '__main__': logging.basicConfig(encoding='utf-8', level=logging.INFO, format='%(message)s') @@ -180,38 +272,39 @@ if __name__ == '__main__': 'AWS_SECRET_ACCESS_KEY': "dummysecret" } - instances = { - CANNON: 2 - } - service_map = { - BRIG: 8082, - GALLEY: 8085, - GUNDECK: 8086, - CANNON: 8083, - CARGOHOLD: 8084, - SPAR: 8088, - FEDERATOR: 8097, - NGINZ: 8080 - } - check_prerequisites(service_map) + # backend_a = [ + # Instance(BRIG, 8282), + # Instance(GALLEY, 8285), + # Instance(GUNDECK, 8286), + # Instance(CANNON, 8283), + # Instance(CANNON, 8383), + # Instance(CARGOHOLD, 8284), + # Instance(SPAR, 8288), + # FederatorInstance(8297, 8298), + # NginzInstance(8080) + # ] + + backend_a = [ + Instance(BRIG, 8082), + Instance(GALLEY, 8085), + Instance(GUNDECK, 8086), + Instance(CANNON, 8083), + Instance(CANNON, 8183), + Instance(CARGOHOLD, 8084), + Instance(SPAR, 8088), + FederatorInstance(8097, 8098), + NginzInstance( + local_port=8080, + http2_port=8090, + ssl_port=8443) + ] + + check_prerequisites(set(s.service for s in backend_a)) - instances = set() try: - for service in service_map.keys(): - sub = service.spawn(service_map, environment) - t = threading.Thread(target=lambda: color_output(sub, service)) - t.start() - instance = Instance(service, t, sub) - instances.add(instance) - - # check status - to_be_checked = set(instances) - while to_be_checked: - instance = next(iter(to_be_checked)) - if check_status(instance.process, - service_map[instance.service], - instance.service): - to_be_checked.remove(instance) + instances = set() + instances |= start_backend(backend_a) + # instances |= start_backend(backend_b) # main script ret = subprocess.run(sys.argv[1:]) @@ -228,4 +321,8 @@ if __name__ == '__main__': # nudge if they don't terminate within a few milliseconds if instance.thread.is_alive(): instance.process.terminate() + instance.thread.join(timeout=0.1) + if instance.thread.is_alive(): + print("force-killing", instance.service.name) + instance.process.send_signal(signal.SIGKILl) instance.thread.join() From 61d1f50e46494edfa605aa0d425e78ea2567df7a Mon Sep 17 00:00:00 2001 From: Paolo Capriotti Date: Mon, 6 Feb 2023 10:24:42 +0100 Subject: [PATCH 09/29] Better handling of spawn failures --- services/run-services | 100 +++++++++++++++++++++++++++--------------- 1 file changed, 64 insertions(+), 36 deletions(-) diff --git a/services/run-services b/services/run-services index 2d4f08ee15..5dd5d3071e 100755 --- a/services/run-services +++ b/services/run-services @@ -1,6 +1,6 @@ #!/usr/bin/env python3 -from dataclasses import dataclass +from dataclasses import dataclass, replace import os import re import select @@ -17,6 +17,10 @@ import time import threading import logging +@dataclass +class SpawnFailException(Exception): + failed_instances: object + class Colors: GREEN = "\x1b[38;5;10m" YELLOW = "\x1b[38;5;11m" @@ -49,7 +53,6 @@ class Service: def spawn(self, config_file, environment): - print('spawn', self.name, config_file) return subprocess.Popen([self.path(), "-c", config_file], encoding='utf-8', cwd=os.path.join(ROOT, "services", self.name), @@ -80,7 +83,6 @@ class Nginz: def spawn(self, config_file, environment): cwd = os.path.join(ROOT, "services", "nginz", "integration-test") - print(cwd) return subprocess.Popen([shutil.which("nginx"), "-p", cwd, "-c", config_file, "-g", "daemon off;"], @@ -98,6 +100,7 @@ class Instance: port: int thread: threading.Thread = None process: subprocess.Popen = None + exception: Exception = None def check_status(self): self.process.poll() @@ -112,11 +115,14 @@ class Instance: return False def spawn(self, service_map, environment): - config_file = self.modified_config_file(service_map) - sub = self.service.spawn(config_file, environment) - t = threading.Thread(target=lambda: color_output(sub, self.service)) - t.start() - return Instance(self.service, self.port, t, sub) + try: + config_file = self.modified_config_file(service_map) + sub = self.service.spawn(config_file, environment) + t = threading.Thread(target=lambda: color_output(sub, self.service)) + t.start() + return Instance(self.service, self.port, t, sub) + except Exception as e: + return Instance(self.service, self.port, exception=e) def modified_config_file(self, service_map): """Overwrite port configuration on this service using the provided @@ -226,16 +232,23 @@ def make_pipe(data): os.close(w) return f"/proc/{os.getpid()}/fd/{r}" -BRIG = Service("brig", Colors.GREEN) -GALLEY = Service("galley", Colors.YELLOW) -GUNDECK = Service("gundeck", Colors.BLUE) -CANNON = Service("cannon", Colors.ORANGE) -CARGOHOLD = Service("cargohold", Colors.PURPLEISH) -SPAR = Service("spar", Colors.ORANGE) -FEDERATOR = Service("federator", Colors.BLUE, - "federatorInternal", check_status=False) -STERN = Service("stern", Colors.YELLOW) -NGINZ = Nginz(Colors.PURPLEISH) +def cleanup_instances(instances): + for instance in instances: + if instance.process is None: continue + instance.process.terminate() + + for instance in instances: + if instance.thread is None: continue + instance.thread.join(timeout=0.1) + # some services don't react promptly to SIGTERM, so we give them a + # nudge if they don't terminate within a few milliseconds + if instance.thread.is_alive(): + instance.process.terminate() + instance.thread.join(timeout=0.1) + if instance.thread.is_alive(): + print("force-killing", instance.service.name) + instance.process.send_signal(signal.SIGKILl) + instance.thread.join() def start_backend(services): # build a service map by choosing an arbitrary instance of each service @@ -245,20 +258,44 @@ def start_backend(services): for blueprint in services: instances.add(blueprint.spawn(service_map, environment)) + failed_instances = [instance for instance in instances + if instance.exception is not None] + to_be_checked = set(instance for instance in instances + if instance.exception is None) + # check status # TODO: global timeout - to_be_checked = set(instances) while to_be_checked: instance = next(iter(to_be_checked)) - print("checking status", instance.service.name) - if instance.check_status(): + try: + done = instance.check_status() + except Exception as e: + failed_instances.append(replace(instance, exception=e)) + done = True + + if done: to_be_checked.remove(instance) else: time.sleep(0.1) - print("---- backend started ----") + if failed_instances: + + cleanup_instances(instances) + raise SpawnFailException(failed_instances) + return instances +BRIG = Service("brig", Colors.GREEN) +GALLEY = Service("galley", Colors.YELLOW) +GUNDECK = Service("gundeck", Colors.BLUE) +CANNON = Service("cannon", Colors.ORANGE) +CARGOHOLD = Service("cargohold", Colors.PURPLEISH) +SPAR = Service("spar", Colors.ORANGE) +FEDERATOR = Service("federator", Colors.BLUE, + "federatorInternal", check_status=False) +STERN = Service("stern", Colors.YELLOW) +NGINZ = Nginz(Colors.PURPLEISH) + if __name__ == '__main__': logging.basicConfig(encoding='utf-8', level=logging.INFO, format='%(message)s') @@ -311,18 +348,9 @@ if __name__ == '__main__': sys.exit(ret.returncode) except KeyboardInterrupt: pass + except SpawnFailException as e: + print(f"{Colors.RED}The following services failed to start:{Colors.RESET}") + for instance in e.failed_instances: + print(f"{instance.service.name} at port {instance.port}") finally: - for instance in instances: - instance.process.terminate() - - for instance in instances: - instance.thread.join(timeout=0.1) - # some services don't react promptly to SIGTERM, so we give them a - # nudge if they don't terminate within a few milliseconds - if instance.thread.is_alive(): - instance.process.terminate() - instance.thread.join(timeout=0.1) - if instance.thread.is_alive(): - print("force-killing", instance.service.name) - instance.process.send_signal(signal.SIGKILl) - instance.thread.join() + cleanup_instances(instances) From f01f4066f8098ac90b4cf09ddad50fee36884a53 Mon Sep 17 00:00:00 2001 From: Paolo Capriotti Date: Mon, 6 Feb 2023 10:37:14 +0100 Subject: [PATCH 10/29] Start nginx by copying the whole conf directory --- .../conf/nginz/integration.b.conf | 19 --------- ...egration.default.conf => integration.conf} | 0 .../integration-test/conf/nginz/nginx.conf | 2 +- services/run-services | 40 +++++++++---------- 4 files changed, 19 insertions(+), 42 deletions(-) delete mode 100644 services/nginz/integration-test/conf/nginz/integration.b.conf rename services/nginz/integration-test/conf/nginz/{integration.default.conf => integration.conf} (100%) diff --git a/services/nginz/integration-test/conf/nginz/integration.b.conf b/services/nginz/integration-test/conf/nginz/integration.b.conf deleted file mode 100644 index 36004d5126..0000000000 --- a/services/nginz/integration-test/conf/nginz/integration.b.conf +++ /dev/null @@ -1,19 +0,0 @@ -# plain TCP/http listening for integration tests only. -listen 8280; -listen 8281; - -# for nginx-without-tls, we need to use a separate port for http2 traffic, -# as nginx cannot handle unencrypted http1 and http2 trafic on the same -# port. -# This port is only used for trying out nginx http2 forwarding without TLS locally and should not -# be ported to any production nginz config. -listen 8290 http2; - -######## TLS/SSL block start ############## -# -# Most integration tests simply use the http ports 8080 and 8081 -# But to also test tls forwarding, this port can be used. -# This applies only locally, as for kubernetes (helm chart) based deployments, -# TLS is terminated at the ingress level, not at nginz level -listen 8643 ssl http2; -listen [::]:8643 ssl http2; diff --git a/services/nginz/integration-test/conf/nginz/integration.default.conf b/services/nginz/integration-test/conf/nginz/integration.conf similarity index 100% rename from services/nginz/integration-test/conf/nginz/integration.default.conf rename to services/nginz/integration-test/conf/nginz/integration.conf diff --git a/services/nginz/integration-test/conf/nginz/nginx.conf b/services/nginz/integration-test/conf/nginz/nginx.conf index 1e94976798..d5d3bca547 100644 --- a/services/nginz/integration-test/conf/nginz/nginx.conf +++ b/services/nginz/integration-test/conf/nginz/nginx.conf @@ -106,7 +106,7 @@ http { # server { - include integration.default.conf; + include integration.conf; # self-signed certificates generated using wire-server/hack/bin/selfsigned.sh ssl_certificate integration-leaf.pem; diff --git a/services/run-services b/services/run-services index 5dd5d3071e..265f4dd8fc 100755 --- a/services/run-services +++ b/services/run-services @@ -168,30 +168,26 @@ class NginzInstance(Instance): def __init__(self, local_port, http2_port, ssl_port): self.http2_port = http2_port self.ssl_port = ssl_port - self.temp_files = [] super().__init__(NGINZ, local_port) def modified_config_file(self, service_map): - override = f""" - listen {self.port}; - listen {self.http2_port} http2; - listen {self.ssl_port} ssl http2; - listen [::]:{self.ssl_port} ssl http2;""" - override_file = tempfile.NamedTemporaryFile() - self.temp_files.append(override_file) - override_file.write(override.encode('utf-8')) - override_file.flush() - - config = open(self.service.config_file()).read() - config = re.sub(r'^include integration.default.conf;$', - f"include {override_file}", - config, re.MULTILINE) - config_file = tempfile.NamedTemporaryFile() - self.temp_files.append(config_file) - config_file.write(config.encode('utf-8')) - config_file.flush() - - return config_file.name + # Create a whole temporary directory and copy all nginx's config files. + # This is necessary because nginx assumes local imports are relative to + # the location of the main configuration file. + self.tmpdir = tempfile.TemporaryDirectory() + shutil.copytree(os.path.dirname(self.service.config_file()), + self.tmpdir.name, + dirs_exist_ok=True) + + with open(os.path.join(self.tmpdir.name, "integration.conf"), 'w') as f: + override = f""" + listen {self.port}; + listen {self.http2_port} http2; + listen {self.ssl_port} ssl http2; + listen [::]:{self.ssl_port} ssl http2;""" + print(override, file=f) + + return os.path.join(self.tmpdir.name, "nginx.conf") def check_prerequisites(services): try: @@ -351,6 +347,6 @@ if __name__ == '__main__': except SpawnFailException as e: print(f"{Colors.RED}The following services failed to start:{Colors.RESET}") for instance in e.failed_instances: - print(f"{instance.service.name} at port {instance.port}") + print(f"{instance.service.name} at port {instance.port} ({instance.exception})") finally: cleanup_instances(instances) From 1eee317255ec41f4d19d04b48f4102278fbf323f Mon Sep 17 00:00:00 2001 From: Paolo Capriotti Date: Mon, 6 Feb 2023 10:45:52 +0100 Subject: [PATCH 11/29] Configure nginx's pid --- Makefile | 6 ++ .../coredns-config/db.example.com | 3 +- .../test/integration/Federation/End2end.hs | 8 +-- .../integration-test/conf/nginz/nginx.conf | 2 +- .../integration-test/conf/nginz/pid.conf | 1 + services/run-services | 62 +++++++++++++------ 6 files changed, 56 insertions(+), 26 deletions(-) create mode 100644 services/nginz/integration-test/conf/nginz/pid.conf diff --git a/Makefile b/Makefile index 986013600f..840ce6f0fd 100644 --- a/Makefile +++ b/Makefile @@ -267,9 +267,15 @@ ifeq ($(package), all) ./dist/galley-schema --keyspace galley_test --replication-factor 1 ./dist/gundeck-schema --keyspace gundeck_test --replication-factor 1 ./dist/spar-schema --keyspace spar_test --replication-factor 1 + + ./dist/brig-schema --keyspace brig_test2 --replication-factor 1 + ./dist/galley-schema --keyspace galley_test2 --replication-factor 1 + ./dist/gundeck-schema --keyspace gundeck_test2 --replication-factor 1 + ./dist/spar-schema --keyspace spar_test2 --replication-factor 1 # How this check works: https://stackoverflow.com/a/9802777 else ifeq ($(package), $(filter $(package),brig galley gundeck spar)) $(EXE_SCHEMA) --keyspace $(package)_test --replication-factor 1 + $(EXE_SCHEMA) --keyspace $(package)_test2 --replication-factor 1 else @echo No schema migrations for $(package) endif diff --git a/deploy/dockerephemeral/coredns-config/db.example.com b/deploy/dockerephemeral/coredns-config/db.example.com index 941502a432..0f33e57975 100644 --- a/deploy/dockerephemeral/coredns-config/db.example.com +++ b/deploy/dockerephemeral/coredns-config/db.example.com @@ -12,4 +12,5 @@ $ORIGIN example.com. www IN A 127.0.0.1 IN AAAA ::1 -_wire-server-federator._tcp IN SRV 0 0 443 federator.integration.example.com. +_wire-server-federator._tcp IN SRV 0 0 9443 localhost. + diff --git a/services/brig/test/integration/Federation/End2end.hs b/services/brig/test/integration/Federation/End2end.hs index a13db52313..18808516e4 100644 --- a/services/brig/test/integration/Federation/End2end.hs +++ b/services/brig/test/integration/Federation/End2end.hs @@ -183,14 +183,14 @@ testGetUsersById brig1 brig2 = do testClaimPrekeySuccess :: Brig -> Brig -> Http () testClaimPrekeySuccess brig1 brig2 = do - self <- randomUser brig1 - user <- randomUser brig2 + self <- randomUser brig2 + user <- randomUser brig1 let new = defNewClient TemporaryClientType (take 1 somePrekeys) (Imports.head someLastPrekeys) - c <- responseJsonError =<< addClient brig2 (userId user) new + c <- responseJsonError =<< addClient brig1 (userId user) new let cpk = ClientPrekey (clientId c) (Imports.head somePrekeys) let quser = userQualifiedId user get - ( brig1 + ( brig2 . zUser (userId self) . paths [ "users", diff --git a/services/nginz/integration-test/conf/nginz/nginx.conf b/services/nginz/integration-test/conf/nginz/nginx.conf index d5d3bca547..564923b263 100644 --- a/services/nginz/integration-test/conf/nginz/nginx.conf +++ b/services/nginz/integration-test/conf/nginz/nginx.conf @@ -1,6 +1,6 @@ worker_processes 4; worker_rlimit_nofile 1024; -pid /tmp/nginz.pid; +include pid.conf; # for easy overriding # nb. start up errors (eg. misconfiguration) may still end up in /$(LOG_PATH)/error.log error_log stderr warn; diff --git a/services/nginz/integration-test/conf/nginz/pid.conf b/services/nginz/integration-test/conf/nginz/pid.conf new file mode 100644 index 0000000000..e722aa5ae2 --- /dev/null +++ b/services/nginz/integration-test/conf/nginz/pid.conf @@ -0,0 +1 @@ +pid /tmp/nginz.pid; diff --git a/services/run-services b/services/run-services index 265f4dd8fc..0d0b2a939f 100755 --- a/services/run-services +++ b/services/run-services @@ -114,9 +114,9 @@ class Instance: except urllib.error.URLError: return False - def spawn(self, service_map, environment): + def spawn(self, service_map, environment, keyspace, domain): try: - config_file = self.modified_config_file(service_map) + config_file = self.modified_config_file(service_map, keyspace, domain) sub = self.service.spawn(config_file, environment) t = threading.Thread(target=lambda: color_output(sub, self.service)) t.start() @@ -124,7 +124,7 @@ class Instance: except Exception as e: return Instance(self.service, self.port, exception=e) - def modified_config_file(self, service_map): + def modified_config_file(self, service_map, keyspace, domain): """Overwrite port configuration on this service using the provided service_map. @@ -141,9 +141,21 @@ class Instance: with open(self.service.config_file()) as f: data = yaml.safe_load(f) + # set ports of other services for service in service_map: if service.internal_name in data: data[service.internal_name]['port'] = get_port(service) + + # set cassandra keyspace + if 'cassandra' in data: + data['cassandra']['keyspace'] = f"{self.service.name}_{keyspace}" + + # set federation domain + if 'optSettings' in data: + data['optSettings']['setFederationDomain'] = domain + elif 'settings' in data: + data['settings']['federationDomain'] = domain + self.set_own_port(data) # write modified config file to pipe @@ -170,7 +182,7 @@ class NginzInstance(Instance): self.ssl_port = ssl_port super().__init__(NGINZ, local_port) - def modified_config_file(self, service_map): + def modified_config_file(self, service_map, keyspace, domain): # Create a whole temporary directory and copy all nginx's config files. # This is necessary because nginx assumes local imports are relative to # the location of the main configuration file. @@ -179,6 +191,7 @@ class NginzInstance(Instance): self.tmpdir.name, dirs_exist_ok=True) + # override port configuration with open(os.path.join(self.tmpdir.name, "integration.conf"), 'w') as f: override = f""" listen {self.port}; @@ -187,6 +200,11 @@ class NginzInstance(Instance): listen [::]:{self.ssl_port} ssl http2;""" print(override, file=f) + # override pid configuration + with open(os.path.join(self.tmpdir.name, "pid.conf"), 'w') as f: + pid = os.path.join(self.tmpdir.name, "nginz.pid") + print(f"pid {pid};", file=f) + return os.path.join(self.tmpdir.name, "nginx.conf") def check_prerequisites(services): @@ -206,6 +224,7 @@ def check_prerequisites(services): sys.exit(1) def color_output(sub, service): + # TODO: add backend name to the output try: for line in sub.stdout: logging.info(f"{service.color}[{service.name}] {line.rstrip()}{Colors.RESET}") @@ -246,13 +265,13 @@ def cleanup_instances(instances): instance.process.send_signal(signal.SIGKILl) instance.thread.join() -def start_backend(services): +def start_backend(services, keyspace, domain): # build a service map by choosing an arbitrary instance of each service service_map = dict((s.service, s.port) for s in services) instances = set() for blueprint in services: - instances.add(blueprint.spawn(service_map, environment)) + instances.add(blueprint.spawn(service_map, environment, keyspace, domain)) failed_instances = [instance for instance in instances if instance.exception is not None] @@ -305,18 +324,6 @@ if __name__ == '__main__': 'AWS_SECRET_ACCESS_KEY': "dummysecret" } - # backend_a = [ - # Instance(BRIG, 8282), - # Instance(GALLEY, 8285), - # Instance(GUNDECK, 8286), - # Instance(CANNON, 8283), - # Instance(CANNON, 8383), - # Instance(CARGOHOLD, 8284), - # Instance(SPAR, 8288), - # FederatorInstance(8297, 8298), - # NginzInstance(8080) - # ] - backend_a = [ Instance(BRIG, 8082), Instance(GALLEY, 8085), @@ -332,12 +339,27 @@ if __name__ == '__main__': ssl_port=8443) ] + backend_b = [ + Instance(BRIG, 9082), + Instance(GALLEY, 9085), + Instance(GUNDECK, 9086), + Instance(CANNON, 9083), + Instance(CANNON, 9183), + Instance(CARGOHOLD, 9084), + Instance(SPAR, 9088), + FederatorInstance(9097, 9098), + NginzInstance( + local_port=9080, + http2_port=9090, + ssl_port=9443) + ] + check_prerequisites(set(s.service for s in backend_a)) try: instances = set() - instances |= start_backend(backend_a) - # instances |= start_backend(backend_b) + instances |= start_backend(backend_a, "test", "example.com") + instances |= start_backend(backend_b, "test2", "b.example.com") # main script ret = subprocess.run(sys.argv[1:]) From 468544997741b3dcc0e455e6e71fabf395e656dd Mon Sep 17 00:00:00 2001 From: Paolo Capriotti Date: Mon, 6 Feb 2023 16:22:12 +0100 Subject: [PATCH 12/29] Add domains to logs and nginx upstreams --- services/run-services | 52 ++++++++++++++++++++++++++++++++++--------- 1 file changed, 41 insertions(+), 11 deletions(-) diff --git a/services/run-services b/services/run-services index 0d0b2a939f..620b34c758 100755 --- a/services/run-services +++ b/services/run-services @@ -114,11 +114,11 @@ class Instance: except urllib.error.URLError: return False - def spawn(self, service_map, environment, keyspace, domain): + def spawn(self, service_map, environment, keyspace, domain, backend_name): try: config_file = self.modified_config_file(service_map, keyspace, domain) sub = self.service.spawn(config_file, environment) - t = threading.Thread(target=lambda: color_output(sub, self.service)) + t = threading.Thread(target=lambda: color_output(sub, self.service, backend_name)) t.start() return Instance(self.service, self.port, t, sub) except Exception as e: @@ -166,6 +166,16 @@ class Instance: if 'saml' in data: data['saml']['spPort'] = self.port +class DummyInstance(Instance): + def spawn(self, service_map, environment, keyspace, domain, backend_name): + return self + + def modified_config_file(self, service_map, keyspace, domain): + return "" + + def check_status(self): + return True + class FederatorInstance(Instance): def __init__(self, internal_port, external_port): self.external_port = external_port @@ -177,9 +187,10 @@ class FederatorInstance(Instance): data['federatorExternal']['port'] = self.external_port class NginzInstance(Instance): - def __init__(self, local_port, http2_port, ssl_port): + def __init__(self, local_port, http2_port, ssl_port, fed_port): self.http2_port = http2_port self.ssl_port = ssl_port + self.fed_port = fed_port super().__init__(NGINZ, local_port) def modified_config_file(self, service_map, keyspace, domain): @@ -200,6 +211,18 @@ class NginzInstance(Instance): listen [::]:{self.ssl_port} ssl http2;""" print(override, file=f) + # override upstreams + with open(os.path.join(self.tmpdir.name, "upstreams"), 'w') as f: + for service, port in service_map.items(): + print(f"upstream {service.internal_name} {{", file=f) + print(f" least_conn;", file=f) + print(f" keepalive 32;", file=f) + print(f" server 127.0.0.1:{port} max_fails=3 weight=1;", file=f) + print("}", file=f) + print("upstream federator_external {", file=f) + print(f" server 127.0.0.1:{self.fed_port} max_fails=3 weight=1;", file=f) + print("}", file=f) + # override pid configuration with open(os.path.join(self.tmpdir.name, "pid.conf"), 'w') as f: pid = os.path.join(self.tmpdir.name, "nginz.pid") @@ -223,11 +246,13 @@ def check_prerequisites(services): logging.error(Colors.RED + str(e) + Colors.RESET) sys.exit(1) -def color_output(sub, service): +def color_output(sub, service, backend_name): + if backend_name is not None: + backend_name = "@" + backend_name # TODO: add backend name to the output try: for line in sub.stdout: - logging.info(f"{service.color}[{service.name}] {line.rstrip()}{Colors.RESET}") + logging.info(f"{service.color}[{service.name}{backend_name}] {line.rstrip()}{Colors.RESET}") finally: sub.terminate() sub.wait() @@ -265,13 +290,13 @@ def cleanup_instances(instances): instance.process.send_signal(signal.SIGKILl) instance.thread.join() -def start_backend(services, keyspace, domain): +def start_backend(services, keyspace, domain, backend_name): # build a service map by choosing an arbitrary instance of each service service_map = dict((s.service, s.port) for s in services) instances = set() for blueprint in services: - instances.add(blueprint.spawn(service_map, environment, keyspace, domain)) + instances.add(blueprint.spawn(service_map, environment, keyspace, domain, backend_name)) failed_instances = [instance for instance in instances if instance.exception is not None] @@ -309,6 +334,7 @@ SPAR = Service("spar", Colors.ORANGE) FEDERATOR = Service("federator", Colors.BLUE, "federatorInternal", check_status=False) STERN = Service("stern", Colors.YELLOW) +PROXY = Service("proxy", Colors.RED) NGINZ = Nginz(Colors.PURPLEISH) if __name__ == '__main__': @@ -332,11 +358,13 @@ if __name__ == '__main__': Instance(CANNON, 8183), Instance(CARGOHOLD, 8084), Instance(SPAR, 8088), + DummyInstance(PROXY, 8087), FederatorInstance(8097, 8098), NginzInstance( local_port=8080, http2_port=8090, - ssl_port=8443) + ssl_port=8443, + fed_port=8098) ] backend_b = [ @@ -347,19 +375,21 @@ if __name__ == '__main__': Instance(CANNON, 9183), Instance(CARGOHOLD, 9084), Instance(SPAR, 9088), + DummyInstance(PROXY, 9087), FederatorInstance(9097, 9098), NginzInstance( local_port=9080, http2_port=9090, - ssl_port=9443) + ssl_port=9443, + fed_port=9098) ] check_prerequisites(set(s.service for s in backend_a)) try: instances = set() - instances |= start_backend(backend_a, "test", "example.com") - instances |= start_backend(backend_b, "test2", "b.example.com") + instances |= start_backend(backend_a, "test", "example.com", "A") + instances |= start_backend(backend_b, "test2", "b.example.com", "B") # main script ret = subprocess.run(sys.argv[1:]) From b508395ec79e3895074daa55e56dcc6b63f400fc Mon Sep 17 00:00:00 2001 From: Paolo Capriotti Date: Tue, 7 Feb 2023 10:00:59 +0100 Subject: [PATCH 13/29] Avoid adding Host header twice to fed requests --- services/federator/src/Federator/Remote.hs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/services/federator/src/Federator/Remote.hs b/services/federator/src/Federator/Remote.hs index 1ea810df18..a978c4a14d 100644 --- a/services/federator/src/Federator/Remote.hs +++ b/services/federator/src/Federator/Remote.hs @@ -123,7 +123,9 @@ interpretRemote = interpret $ \case let path = LBS.toStrict . toLazyByteString $ HTTP.encodePathSegments ["federation", componentName component, rpc] - req' = HTTP2.requestBuilder HTTP.methodPost path headers body + -- filter out Host header, because the HTTP2 client adds it back + headers' = filter ((/= "Host") . fst) headers + req' = HTTP2.requestBuilder HTTP.methodPost path headers' body tlsConfig = mkTLSConfig settings hostname port resp <- mapError (RemoteError target) . (fromEither @FederatorClientHTTP2Error =<<) . embed $ From 67e0ffe04bfa7d66690e457bd730357c2396a91c Mon Sep 17 00:00:00 2001 From: Paolo Capriotti Date: Tue, 7 Feb 2023 10:36:07 +0100 Subject: [PATCH 14/29] Add DNS entries for local nginx instances --- deploy/dockerephemeral/coredns-config/db.example.com | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/deploy/dockerephemeral/coredns-config/db.example.com b/deploy/dockerephemeral/coredns-config/db.example.com index 0f33e57975..39beb50578 100644 --- a/deploy/dockerephemeral/coredns-config/db.example.com +++ b/deploy/dockerephemeral/coredns-config/db.example.com @@ -12,5 +12,6 @@ $ORIGIN example.com. www IN A 127.0.0.1 IN AAAA ::1 -_wire-server-federator._tcp IN SRV 0 0 9443 localhost. +_wire-server-federator._tcp IN SRV 0 0 8443 localhost. +_wire-server-federator._tcp.b IN SRV 0 0 9443 localhost. From d52746bfaccdb6d22bb3d87f73e3a1720f996f29 Mon Sep 17 00:00:00 2001 From: Paolo Capriotti Date: Tue, 7 Feb 2023 10:36:44 +0100 Subject: [PATCH 15/29] Set log level to Warn on all services --- services/brig/brig.integration.yaml | 2 -- services/cannon/cannon.integration.yaml | 2 +- services/cargohold/cargohold.integration.yaml | 2 +- services/federator/federator.integration.yaml | 2 +- services/federator/src/Federator/Error.hs | 3 ++- services/galley/galley.integration.yaml | 2 +- services/gundeck/gundeck.integration.yaml | 2 +- services/run-services | 3 +-- services/spar/spar.integration.yaml | 2 +- 9 files changed, 9 insertions(+), 11 deletions(-) diff --git a/services/brig/brig.integration.yaml b/services/brig/brig.integration.yaml index 41b4287f9f..f38e8507c1 100644 --- a/services/brig/brig.integration.yaml +++ b/services/brig/brig.integration.yaml @@ -195,6 +195,4 @@ optSettings: setEnableMLS: true logLevel: Warn -# ^ NOTE: We log too much in brig, if we set this to Info like other services, running tests -# produces too many logs, hence this is set to Warn. logNetStrings: false diff --git a/services/cannon/cannon.integration.yaml b/services/cannon/cannon.integration.yaml index 5bd5d2a706..f64f3c104f 100644 --- a/services/cannon/cannon.integration.yaml +++ b/services/cannon/cannon.integration.yaml @@ -21,5 +21,5 @@ drainOpts: millisecondsBetweenBatches: 500 minBatchSize: 5 -logLevel: Info +logLevel: Warn logNetStrings: false diff --git a/services/cargohold/cargohold.integration.yaml b/services/cargohold/cargohold.integration.yaml index d43b06f019..0f85c2b42c 100644 --- a/services/cargohold/cargohold.integration.yaml +++ b/services/cargohold/cargohold.integration.yaml @@ -24,5 +24,5 @@ settings: downloadLinkTTL: 300 # Seconds federationDomain: example.com -logLevel: Info +logLevel: Warn logNetStrings: false diff --git a/services/federator/federator.integration.yaml b/services/federator/federator.integration.yaml index 42e08d35c5..9562b697e1 100644 --- a/services/federator/federator.integration.yaml +++ b/services/federator/federator.integration.yaml @@ -14,7 +14,7 @@ galley: host: 0.0.0.0 port: 8085 -logLevel: Debug +logLevel: Warn logNetStrings: false optSettings: diff --git a/services/federator/src/Federator/Error.hs b/services/federator/src/Federator/Error.hs index 4e62a06ee1..b680212876 100644 --- a/services/federator/src/Federator/Error.hs +++ b/services/federator/src/Federator/Error.hs @@ -22,6 +22,7 @@ module Federator.Error where import qualified Data.Aeson as A +import Debug.Trace import Imports import Network.HTTP.Types.Header import qualified Network.Wai as Wai @@ -32,4 +33,4 @@ class AsWai e where waiErrorDescription :: e -> Text errorResponse :: [Header] -> Wai.Error -> Wai.Response -errorResponse hdrs e = Wai.responseLBS (Wai.code e) hdrs (A.encode e) +errorResponse hdrs e = Wai.responseLBS (Wai.code e) hdrs (traceShowId (A.encode e)) diff --git a/services/galley/galley.integration.yaml b/services/galley/galley.integration.yaml index 6a90f1b58e..87f02e7e34 100644 --- a/services/galley/galley.integration.yaml +++ b/services/galley/galley.integration.yaml @@ -73,7 +73,7 @@ settings: status: disabled lockStatus: locked -logLevel: Info +logLevel: Warn logNetStrings: false journal: # if set, journals; if not set, disables journaling diff --git a/services/gundeck/gundeck.integration.yaml b/services/gundeck/gundeck.integration.yaml index 5457469af0..1d56cfdf88 100644 --- a/services/gundeck/gundeck.integration.yaml +++ b/services/gundeck/gundeck.integration.yaml @@ -37,5 +37,5 @@ settings: hard: 30 # more than this number of threads will not be allowed soft: 10 # more than this number of threads will be warned about -logLevel: Info +logLevel: Warn logNetStrings: false diff --git a/services/run-services b/services/run-services index 620b34c758..0c68c5213f 100755 --- a/services/run-services +++ b/services/run-services @@ -249,7 +249,6 @@ def check_prerequisites(services): def color_output(sub, service, backend_name): if backend_name is not None: backend_name = "@" + backend_name - # TODO: add backend name to the output try: for line in sub.stdout: logging.info(f"{service.color}[{service.name}{backend_name}] {line.rstrip()}{Colors.RESET}") @@ -355,7 +354,7 @@ if __name__ == '__main__': Instance(GALLEY, 8085), Instance(GUNDECK, 8086), Instance(CANNON, 8083), - Instance(CANNON, 8183), + Instance(CANNON, 8183), # use the other configuration file for this Instance(CARGOHOLD, 8084), Instance(SPAR, 8088), DummyInstance(PROXY, 8087), diff --git a/services/spar/spar.integration.yaml b/services/spar/spar.integration.yaml index 77792b4ee1..6a1eb2f398 100644 --- a/services/spar/spar.integration.yaml +++ b/services/spar/spar.integration.yaml @@ -1,6 +1,6 @@ saml: version: SAML2.0 - logLevel: Info + logLevel: Warn spHost: 0.0.0.0 spPort: 8088 From ca62d72f56f1cb80a8b7a819260a09682dcd33f8 Mon Sep 17 00:00:00 2001 From: Paolo Capriotti Date: Tue, 7 Feb 2023 10:52:28 +0100 Subject: [PATCH 16/29] Fix cannon port in integration configuration --- services/integration.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/integration.yaml b/services/integration.yaml index 97adbc1e48..20e002ec88 100644 --- a/services/integration.yaml +++ b/services/integration.yaml @@ -88,7 +88,7 @@ backendTwo: port: 9084 cannon: host: 127.0.0.1 - port: 9086 + port: 9083 redis2: host: 127.0.0.1 From aaddba70d3ce39fc16fc39c3c834f1d9ce5e2571 Mon Sep 17 00:00:00 2001 From: Paolo Capriotti Date: Tue, 7 Feb 2023 11:13:34 +0100 Subject: [PATCH 17/29] Set log level dynamically for all services --- services/run-services | 34 +++++++++++++++++++++++++--------- 1 file changed, 25 insertions(+), 9 deletions(-) diff --git a/services/run-services b/services/run-services index 0c68c5213f..63677c9ba0 100755 --- a/services/run-services +++ b/services/run-services @@ -36,6 +36,12 @@ class Service: color: str _internal_name: str = None check_status: bool = True + level: str = None + + def with_level(self, level=None): + if level is None: + level = os.environ.get("INTEGRATION_{self.name.capitalize()}_LEVEL") + return replace(self, level=level) @property def internal_name(self): @@ -67,6 +73,7 @@ class Service: @dataclass(frozen=True) class Nginz: color: str + level: str = None @property def name(self): return "nginz" @@ -156,6 +163,13 @@ class Instance: elif 'settings' in data: data['settings']['federationDomain'] = domain + # set log level + if self.service.level is not None: + if 'logLevel' in data: + data['logLevel'] = self.service.level + elif 'saml' in data: + data['saml']['logLevel'] = self.service.level + self.set_own_port(data) # write modified config file to pipe @@ -324,16 +338,18 @@ def start_backend(services, keyspace, domain, backend_name): return instances -BRIG = Service("brig", Colors.GREEN) -GALLEY = Service("galley", Colors.YELLOW) -GUNDECK = Service("gundeck", Colors.BLUE) -CANNON = Service("cannon", Colors.ORANGE) -CARGOHOLD = Service("cargohold", Colors.PURPLEISH) -SPAR = Service("spar", Colors.ORANGE) +level = os.environ.get("INTEGRATION_LEVEL") +BRIG = Service("brig", Colors.GREEN).with_level(level) +GALLEY = Service("galley", Colors.YELLOW).with_level(level) +GUNDECK = Service("gundeck", Colors.BLUE).with_level(level) +CANNON = Service("cannon", Colors.ORANGE).with_level(level) +CARGOHOLD = Service("cargohold", Colors.PURPLEISH).with_level(level) +SPAR = Service("spar", Colors.ORANGE).with_level(level) FEDERATOR = Service("federator", Colors.BLUE, - "federatorInternal", check_status=False) -STERN = Service("stern", Colors.YELLOW) -PROXY = Service("proxy", Colors.RED) + "federatorInternal", + check_status=False).with_level(level) +STERN = Service("stern", Colors.YELLOW).with_level(level) +PROXY = Service("proxy", Colors.RED).with_level(level) NGINZ = Nginz(Colors.PURPLEISH) if __name__ == '__main__': From e2d494db721d706a335a55f99be17ef64ccc4931 Mon Sep 17 00:00:00 2001 From: Paolo Capriotti Date: Tue, 7 Feb 2023 13:51:00 +0100 Subject: [PATCH 18/29] Pass environment through to the main script --- services/run-services | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/services/run-services b/services/run-services index 63677c9ba0..464195cef7 100755 --- a/services/run-services +++ b/services/run-services @@ -407,7 +407,9 @@ if __name__ == '__main__': instances |= start_backend(backend_b, "test2", "b.example.com", "B") # main script - ret = subprocess.run(sys.argv[1:]) + ret = subprocess.run(sys.argv[1:], + env=dict(list(os.environ.items()) + + list(environment.items()))) sys.exit(ret.returncode) except KeyboardInterrupt: pass From 13d29ec2545dd99946b08121556fdaff787858fa Mon Sep 17 00:00:00 2001 From: Paolo Capriotti Date: Tue, 7 Feb 2023 14:35:06 +0100 Subject: [PATCH 19/29] Add service spawn timeout --- services/run-services | 59 ++++++++++++++++++++++++++++--------------- 1 file changed, 39 insertions(+), 20 deletions(-) diff --git a/services/run-services b/services/run-services index 464195cef7..972b626828 100755 --- a/services/run-services +++ b/services/run-services @@ -37,6 +37,7 @@ class Service: _internal_name: str = None check_status: bool = True level: str = None + config: str = None def with_level(self, level=None): if level is None: @@ -54,8 +55,12 @@ class Service: return os.path.join(ROOT, "dist", self.name) def config_file(self): + if self.config is None: + base = self.name + else: + base = self.config return os.path.join(ROOT, "services", self.name, - self.name + ".integration.yaml") + base + ".integration.yaml") def spawn(self, config_file, environment): @@ -313,26 +318,40 @@ def start_backend(services, keyspace, domain, backend_name): failed_instances = [instance for instance in instances if instance.exception is not None] - to_be_checked = set(instance for instance in instances - if instance.exception is None) - # check status - # TODO: global timeout + # set up a timeout for checking instances + timeout = threading.Condition() + checking = True + def check_timeout(): + nonlocal checking + time.sleep(5) + with timeout: + checking = False + timeout.notify() + threading.Thread(target=check_timeout).start() + + # check instances + to_be_checked = [instance for instance in instances + if instance.exception is None] while to_be_checked: - instance = next(iter(to_be_checked)) - try: - done = instance.check_status() - except Exception as e: - failed_instances.append(replace(instance, exception=e)) - done = True - - if done: - to_be_checked.remove(instance) - else: - time.sleep(0.1) + with timeout: + if not checking: + print(f"{Colors.RED}Timeout while spawing services{Colors.RESET}") + failed_instances.extend(to_be_checked) + break + + to_be_checked_again = set() + for instance in to_be_checked: + try: + if not instance.check_status(): + to_be_checked_again.add(instance) + except Exception as e: + failed_instances.append(replace(instance, exception=e)) + + to_be_checked = to_be_checked_again + time.sleep(0.5) if failed_instances: - cleanup_instances(instances) raise SpawnFailException(failed_instances) @@ -370,7 +389,7 @@ if __name__ == '__main__': Instance(GALLEY, 8085), Instance(GUNDECK, 8086), Instance(CANNON, 8083), - Instance(CANNON, 8183), # use the other configuration file for this + # TODO: add a second cannon instance using the other configuration file Instance(CARGOHOLD, 8084), Instance(SPAR, 8088), DummyInstance(PROXY, 8087), @@ -387,7 +406,6 @@ if __name__ == '__main__': Instance(GALLEY, 9085), Instance(GUNDECK, 9086), Instance(CANNON, 9083), - Instance(CANNON, 9183), Instance(CARGOHOLD, 9084), Instance(SPAR, 9088), DummyInstance(PROXY, 9087), @@ -416,6 +434,7 @@ if __name__ == '__main__': except SpawnFailException as e: print(f"{Colors.RED}The following services failed to start:{Colors.RESET}") for instance in e.failed_instances: - print(f"{instance.service.name} at port {instance.port} ({instance.exception})") + print(f"{instance.service.name} at port {instance.port}" + + (f" ({instance.exception})" if instance.exception else "")) finally: cleanup_instances(instances) From 044e94b1cbd145241b244a673623baffca0d1df5 Mon Sep 17 00:00:00 2001 From: Paolo Capriotti Date: Tue, 7 Feb 2023 15:28:08 +0100 Subject: [PATCH 20/29] Add a second cannon instance --- services/run-services | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/services/run-services b/services/run-services index 972b626828..e9b40ec3fe 100755 --- a/services/run-services +++ b/services/run-services @@ -1,6 +1,7 @@ #!/usr/bin/env python3 from dataclasses import dataclass, replace +import logging import os import re import select @@ -14,8 +15,8 @@ import urllib.error import sys import tempfile import time +import traceback import threading -import logging @dataclass class SpawnFailException(Exception): @@ -144,19 +145,13 @@ class Instance: file to it, and returning a path to the read end of the pipe (in /proc).""" - def get_port(service): - if service is self.service: - return self.port - else: - return service_map[service] - with open(self.service.config_file()) as f: data = yaml.safe_load(f) # set ports of other services for service in service_map: if service.internal_name in data: - data[service.internal_name]['port'] = get_port(service) + data[service.internal_name]['port'] = service_map[service] # set cassandra keyspace if 'cassandra' in data: @@ -182,8 +177,10 @@ class Instance: def set_own_port(self, data): # spar's own port is in a different place - if 'saml' in data: + if self.service.name == 'spar': data['saml']['spPort'] = self.port + elif self.service.name in data: + data[self.service.name]['port'] = self.port class DummyInstance(Instance): def spawn(self, service_map, environment, keyspace, domain, backend_name): @@ -349,7 +346,7 @@ def start_backend(services, keyspace, domain, backend_name): failed_instances.append(replace(instance, exception=e)) to_be_checked = to_be_checked_again - time.sleep(0.5) + time.sleep(0.05) if failed_instances: cleanup_instances(instances) @@ -362,6 +359,8 @@ BRIG = Service("brig", Colors.GREEN).with_level(level) GALLEY = Service("galley", Colors.YELLOW).with_level(level) GUNDECK = Service("gundeck", Colors.BLUE).with_level(level) CANNON = Service("cannon", Colors.ORANGE).with_level(level) +CANNON2 = Service("cannon", Colors.ORANGE, + "cannon2", config="cannon2").with_level(level) CARGOHOLD = Service("cargohold", Colors.PURPLEISH).with_level(level) SPAR = Service("spar", Colors.ORANGE).with_level(level) FEDERATOR = Service("federator", Colors.BLUE, @@ -389,7 +388,7 @@ if __name__ == '__main__': Instance(GALLEY, 8085), Instance(GUNDECK, 8086), Instance(CANNON, 8083), - # TODO: add a second cannon instance using the other configuration file + Instance(CANNON2, 8183), Instance(CARGOHOLD, 8084), Instance(SPAR, 8088), DummyInstance(PROXY, 8087), @@ -406,6 +405,7 @@ if __name__ == '__main__': Instance(GALLEY, 9085), Instance(GUNDECK, 9086), Instance(CANNON, 9083), + Instance(CANNON2, 9183), Instance(CARGOHOLD, 9084), Instance(SPAR, 9088), DummyInstance(PROXY, 9087), From 80449a67dc0c60851088aafcabaa49004c76616b Mon Sep 17 00:00:00 2001 From: Paolo Capriotti Date: Tue, 7 Feb 2023 15:49:15 +0100 Subject: [PATCH 21/29] Honour INTEGRATION_FEDERATION_TESTS variable --- Makefile | 14 +++++++++++++- services/run-services | 26 ++++++++++++++------------ 2 files changed, 27 insertions(+), 13 deletions(-) diff --git a/Makefile b/Makefile index 840ce6f0fd..bc497201d1 100644 --- a/Makefile +++ b/Makefile @@ -249,8 +249,17 @@ ifeq ($(package), all) ./dist/galley-schema --keyspace galley_test --replication-factor 1 --reset ./dist/gundeck-schema --keyspace gundeck_test --replication-factor 1 --reset ./dist/spar-schema --keyspace spar_test --replication-factor 1 --reset +ifeq ($(INTEGRATION_FEDERATION_TESTS), 1) + ./dist/brig-schema --keyspace brig_test2 --replication-factor 1 --reset + ./dist/galley-schema --keyspace galley_test2 --replication-factor 1 --reset + ./dist/gundeck-schema --keyspace gundeck_test2 --replication-factor 1 --reset + ./dist/spar-schema --keyspace spar_test2 --replication-factor 1 --reset +endif else $(EXE_SCHEMA) --keyspace $(package)_test --replication-factor 1 --reset +ifeq ($(INTEGRATION_FEDERATION_TESTS), 1) + $(EXE_SCHEMA) --keyspace $(package)_test2 --replication-factor 1 --reset +endif endif # Usage: @@ -267,15 +276,18 @@ ifeq ($(package), all) ./dist/galley-schema --keyspace galley_test --replication-factor 1 ./dist/gundeck-schema --keyspace gundeck_test --replication-factor 1 ./dist/spar-schema --keyspace spar_test --replication-factor 1 - +ifeq ($(INTEGRATION_FEDERATION_TESTS), 1) ./dist/brig-schema --keyspace brig_test2 --replication-factor 1 ./dist/galley-schema --keyspace galley_test2 --replication-factor 1 ./dist/gundeck-schema --keyspace gundeck_test2 --replication-factor 1 ./dist/spar-schema --keyspace spar_test2 --replication-factor 1 +endif # How this check works: https://stackoverflow.com/a/9802777 else ifeq ($(package), $(filter $(package),brig galley gundeck spar)) $(EXE_SCHEMA) --keyspace $(package)_test --replication-factor 1 +ifeq ($(INTEGRATION_FEDERATION_TESTS), 1) $(EXE_SCHEMA) --keyspace $(package)_test2 --replication-factor 1 +endif else @echo No schema migrations for $(package) endif diff --git a/services/run-services b/services/run-services index e9b40ec3fe..63c7b24a78 100755 --- a/services/run-services +++ b/services/run-services @@ -354,20 +354,21 @@ def start_backend(services, keyspace, domain, backend_name): return instances -level = os.environ.get("INTEGRATION_LEVEL") -BRIG = Service("brig", Colors.GREEN).with_level(level) -GALLEY = Service("galley", Colors.YELLOW).with_level(level) -GUNDECK = Service("gundeck", Colors.BLUE).with_level(level) -CANNON = Service("cannon", Colors.ORANGE).with_level(level) +ENABLE_FEDERATION = os.environ.get("INTEGRATION_FEDERATION_TESTS") == "1" +LEVEL = os.environ.get("INTEGRATION_LEVEL") +BRIG = Service("brig", Colors.GREEN).with_level(LEVEL) +GALLEY = Service("galley", Colors.YELLOW).with_level(LEVEL) +GUNDECK = Service("gundeck", Colors.BLUE).with_level(LEVEL) +CANNON = Service("cannon", Colors.ORANGE).with_level(LEVEL) CANNON2 = Service("cannon", Colors.ORANGE, - "cannon2", config="cannon2").with_level(level) -CARGOHOLD = Service("cargohold", Colors.PURPLEISH).with_level(level) -SPAR = Service("spar", Colors.ORANGE).with_level(level) + "cannon2", config="cannon2").with_level(LEVEL) +CARGOHOLD = Service("cargohold", Colors.PURPLEISH).with_level(LEVEL) +SPAR = Service("spar", Colors.ORANGE).with_level(LEVEL) FEDERATOR = Service("federator", Colors.BLUE, "federatorInternal", - check_status=False).with_level(level) -STERN = Service("stern", Colors.YELLOW).with_level(level) -PROXY = Service("proxy", Colors.RED).with_level(level) + check_status=False).with_level(LEVEL) +STERN = Service("stern", Colors.YELLOW).with_level(LEVEL) +PROXY = Service("proxy", Colors.RED).with_level(LEVEL) NGINZ = Nginz(Colors.PURPLEISH) if __name__ == '__main__': @@ -422,7 +423,8 @@ if __name__ == '__main__': try: instances = set() instances |= start_backend(backend_a, "test", "example.com", "A") - instances |= start_backend(backend_b, "test2", "b.example.com", "B") + if ENABLE_FEDERATION: + instances |= start_backend(backend_b, "test2", "b.example.com", "B") # main script ret = subprocess.run(sys.argv[1:], From 220e368d060790549ade56ea641241ae50eedf23 Mon Sep 17 00:00:00 2001 From: Paolo Capriotti Date: Tue, 7 Feb 2023 16:14:13 +0100 Subject: [PATCH 22/29] Remove old integration.sh script --- charts/gundeck/templates/tests/configmap.yaml | 2 +- hack/bin/cabal-run-integration.sh | 1 - services/integration.sh | 171 ------------------ services/start-services-only.sh | 2 +- tools/stern/src/Stern/Intra.hs | 2 +- treefmt.toml | 2 +- 6 files changed, 4 insertions(+), 176 deletions(-) delete mode 100755 services/integration.sh diff --git a/charts/gundeck/templates/tests/configmap.yaml b/charts/gundeck/templates/tests/configmap.yaml index 6829860247..c5df36067c 100644 --- a/charts/gundeck/templates/tests/configmap.yaml +++ b/charts/gundeck/templates/tests/configmap.yaml @@ -19,7 +19,7 @@ data: # some gundeck integration tests make use of two different # cannon instances to test the distributed case. when running # the integration tests locally, the two instances will be spun - # up separately (see `wire-server/services/integration.sh`). + # up separately (see `wire-server/services/run-services`). # # here, we spin up two replicas, provide the integration tests # with the same service coordinates, and rely on the k8s load diff --git a/hack/bin/cabal-run-integration.sh b/hack/bin/cabal-run-integration.sh index 758855e4b6..f92a8c5284 100755 --- a/hack/bin/cabal-run-integration.sh +++ b/hack/bin/cabal-run-integration.sh @@ -46,7 +46,6 @@ run_integration_tests() { service_dir="$TOP_LEVEL/services/$package" cd "$service_dir" - # "$TOP_LEVEL/services/integration.sh" \ "/usr/bin/python3" "$TOP_LEVEL/services/run-services" \ "$TOP_LEVEL/dist/$package-integration" \ -s "$service_dir/$package.integration.yaml" \ diff --git a/services/integration.sh b/services/integration.sh deleted file mode 100755 index a0ce189476..0000000000 --- a/services/integration.sh +++ /dev/null @@ -1,171 +0,0 @@ -#!/usr/bin/env bash -set -eo pipefail - -USAGE="$0 [args...]" -EXE=${1:?$USAGE} -TOP_LEVEL="$( cd "$( dirname "${BASH_SOURCE[0]}" )/.." && pwd )" -DIR="${TOP_LEVEL}/services" -PARENT_PID=$$ -rm -f /tmp/integration.* # remove previous temp files, if any -EXIT_STATUS_LOCATION=$(mktemp "/tmp/integration.XXXXXXXXXXX") -echo 1 >"${EXIT_STATUS_LOCATION}" - -function kill_all() { - # kill the process tree of the PARENT_PID - kill -9 -${PARENT_PID} &> /dev/null -} - -function list_descendants () { - local children - children="$(pgrep -P "$1")" - for pid in $children - do - list_descendants "$pid" - done - echo "$children" -} - -function kill_gracefully() { - pkill "federator|gundeck|brig|galley|cargohold|cannon|spar|nginz|stern" - sleep 1 - kill $(list_descendants "$PARENT_PID") &> /dev/null -} - -trap "kill_gracefully; kill_all" INT TERM ERR - -function check_prerequisites() { - if ! ( nc -z 127.0.0.1 9042 \ - && nc -z 127.0.0.1 9200 \ - && nc -z 127.0.0.1 6379 ); then - echo "Databases not up. Maybe run 'deploy/dockerephemeral/run.sh' in a separate terminal first?"; exit 1; - fi - if [ ! -f "${TOP_LEVEL}/dist/brig" ] \ - && [ ! -f "${TOP_LEVEL}/dist/galley" ] \ - && [ ! -f "${TOP_LEVEL}/dist/cannon" ] \ - && [ ! -f "${TOP_LEVEL}/dist/gundeck" ] \ - && [ ! -f "${TOP_LEVEL}/dist/cargohold" ] \ - && [ ! -f "${TOP_LEVEL}/dist/stern" ] \ - && [ ! -f "${TOP_LEVEL}/dist/spar" ]; then - echo "Not all services are compiled. How about you run 'cd ${TOP_LEVEL} && make' first?"; exit 1; - fi -} - -blue=6 -green=10 -orange=3 -yellow=11 -purpleish=13 - -if [[ $INTEGRATION_USE_REAL_AWS -eq 1 ]]; then - echo 'Attempting to run integration tests using real AWS services!' - [ -z "$AWS_REGION" ] && echo "Need to set AWS_REGION in your environment" && exit 1; - [ -z "$AWS_ACCESS_KEY_ID" ] && echo "Need to set AWS_ACCESS_KEY_ID in your environment" && exit 1; - [ -z "$AWS_SECRET_ACCESS_KEY" ] && echo "Need to set AWS_SECRET_ACCESS_KEY in your environment" && exit 1; - "${TOP_LEVEL}"/services/gen-aws-conf.sh - integration_file_extension='-aws.yaml' -elif [[ $INTEGRATION_CARGOHOLD_ONLY_COMPAT -eq 1 ]]; then - echo "Running tests using specific S3 buckets for cargohold using folder $CARGOHOLD_COMPAT_CONFIG_FOLDER" - if [ ! -f "${CARGOHOLD_COMPAT_CONFIG_FOLDER}/env.sh" ] \ - && [ ! -f "${CARGOHOLD_COMPAT_CONFIG_FOLDER}/cargohold.integration.yaml" ]; then - echo 'expecting a CARGOHOLD_COMPAT_CONFIG_FOLDER/cargohold.integration.yaml and' - echo 'expecting a CARGOHOLD_COMPAT_CONFIG_FOLDER/env.sh' - exit 1; - fi -else - # brig,gundeck,galley use the amazonka library's 'Discover', which expects AWS credentials - # even if those are not used/can be dummy values with the fake sqs/ses/etc containers used - # (see deploy/dockerephemeral/docker-compose.yaml ) - echo 'Running tests using mocked AWS services' - export AWS_REGION=eu-west-1 - export AWS_ACCESS_KEY_ID=dummykey - export AWS_SECRET_ACCESS_KEY=dummysecret - integration_file_extension='.yaml' -fi - -function run() { - service=$1 - instance=$2 - colour=$3 - configfile=${4:-"${service}${instance}.integration${integration_file_extension}"} - # Check if we're on a Mac - if [[ "$OSTYPE" == "darwin"* ]]; then - # Mac sed uses '-l' to set line-by-line buffering - UNBUFFERED=-l - # Test if sed supports buffer settings. GNU sed does, busybox does not. - elif sed -u '' /dev/null 2>&1; then - UNBUFFERED=-u - else - echo -e "\n\nWARNING: log output is buffered and may not show on your screen!\n\n" - UNBUFFERED='' - fi - ( ( cd "${DIR}/${service}" && "${TOP_LEVEL}/dist/${service}" -c "${configfile}" ) || kill_all) \ - | sed ${UNBUFFERED} -e "s/^/$(tput setaf ${colour})[${service}] /" -e "s/$/$(tput sgr0)/" & -} - - -if [[ $INTEGRATION_CARGOHOLD_ONLY_COMPAT -eq 1 ]]; then - source "${CARGOHOLD_COMPAT_CONFIG_FOLDER}/env.sh" - echo run cargohold "" ${purpleish} "${CARGOHOLD_COMPAT_CONFIG_FOLDER}/cargohold.integration.yaml" - run cargohold "" ${purpleish} "${CARGOHOLD_COMPAT_CONFIG_FOLDER}/cargohold.integration.yaml" -else - check_prerequisites - run brig "" ${green} - run galley "" ${yellow} - run gundeck "" ${blue} - run cannon "" ${orange} - run cannon "2" ${orange} - run cargohold "" ${purpleish} - run spar "" ${orange} - run federator "" ${blue} - run stern "" ${yellow} -fi - -function run_nginz() { - colour=$1 - - if [[ ! ${COMPILE_NGINX_USING_NIX:-1} -eq 0 ]]; then - # For nix we don't need LD_LIBRARY_PATH; we link against libzauth directly. - nginz=$(nix-build "${TOP_LEVEL}/nix" -A pkgs.nginz --no-out-link ) - (cd ${NGINZ_WORK_DIR} && ${nginz}/bin/nginx -p ${NGINZ_WORK_DIR} -c ${NGINZ_WORK_DIR}/conf/nginz/nginx.conf -g 'daemon off;' || kill_all) \ - | sed -e "s/^/$(tput setaf ${colour})[nginz] /" -e "s/$/$(tput sgr0)/" & - else - prefix=$([ -w /usr/local ] && echo /usr/local || echo "${HOME}/.wire-dev") - - (cd ${NGINZ_WORK_DIR} && LD_LIBRARY_PATH=$LD_LIBRARY_PATH:${prefix}/lib/ ${TOP_LEVEL}/dist/nginx -p ${NGINZ_WORK_DIR} -c ${NGINZ_WORK_DIR}/conf/nginz/nginx.conf -g 'daemon off;' || kill_all) \ - | sed -e "s/^/$(tput setaf ${colour})[nginz] /" -e "s/$/$(tput sgr0)/" & - fi -} - -NGINZ_PORT="" - -if [[ ! ${INTEGRATION_USE_NGINZ:-1} -eq 0 ]]; then - NGINZ_PORT=8080 - # Note: for integration tests involving nginz, - # nginz and brig must share the same zauth public/private keys - export NGINZ_WORK_DIR="$TOP_LEVEL/services/nginz/integration-test" - - run_nginz ${purpleish} -fi - -# the ports are copied from ./integration.yaml -if [[ $INTEGRATION_CARGOHOLD_ONLY_COMPAT -eq 1 ]]; then - PORT_LIST="8084" -else - PORT_LIST="8082 8083 8084 8085 8086 8088 $NGINZ_PORT" -fi - -while [ "$all_services_are_up" == "" ]; do - export all_services_are_up="1" - for port in $PORT_LIST; do - ( curl --write-out '%{http_code}' --silent --output /dev/null http://localhost:"$port"/i/status \ - | grep -q '^20[04]' ) \ - || export all_services_are_up="" - done - sleep 1 -done -echo "all services are up!" - -( ${EXE} "${@:2}" && echo 0 > "${EXIT_STATUS_LOCATION}" && kill_gracefully ) || kill_gracefully & - -wait -exit $(<"${EXIT_STATUS_LOCATION}") diff --git a/services/start-services-only.sh b/services/start-services-only.sh index c445f8f21c..db1bd2b96f 100755 --- a/services/start-services-only.sh +++ b/services/start-services-only.sh @@ -7,5 +7,5 @@ set -eo pipefail SERVICES_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" -# call integration.sh, show a message, then sleep (instead of executing a test executable) +# call run-services, show a message, then sleep (instead of executing a test executable) "/usr/bin/python3" "$SERVICES_DIR/run-services" bash -c 'printf "(This will hang, Control+C to close.)\nNow you can manually curl them or start an integration test executable manually with e.g. \n(first cd to a service dir for correct working directory)\n cd services/brig && ../../dist/brig-integration -s brig.integration.yaml -i ../integration.yaml\n" && sleep 1000000' diff --git a/tools/stern/src/Stern/Intra.hs b/tools/stern/src/Stern/Intra.hs index c27ef36829..0ca2c9dd38 100644 --- a/tools/stern/src/Stern/Intra.hs +++ b/tools/stern/src/Stern/Intra.hs @@ -122,7 +122,7 @@ backendApiVersion :: Version backendApiVersion = V2 -- | Make sure the backend supports `backendApiVersion`. Crash if it doesn't. (This is called --- in `Stern.API` so problems make `./services/integration.sh` crash.) +-- in `Stern.API` so problems make `./services/run-service` crash.) assertBackendApiVersion :: App () assertBackendApiVersion = recoverAll (constantDelay 1000000 <> limitRetries 5) $ \_retryStatus -> do b <- view brig diff --git a/treefmt.toml b/treefmt.toml index 2984c8f027..6dd52aa914 100644 --- a/treefmt.toml +++ b/treefmt.toml @@ -33,7 +33,7 @@ excludes = [ "services/spar/test-scim-suite/runsuite.sh", "services/spar/test-scim-suite/run.sh", "services/brig/federation-tests.sh", - "services/integration.sh", + "services/run-services", "hack/bin/create_test_team_members.sh", "hack/bin/create_test_team_scim.sh", "hack/bin/create_test_user.sh", From 0b1c704b4009f6c33cd04c42070590b13e9d3d1a Mon Sep 17 00:00:00 2001 From: Paolo Capriotti Date: Tue, 7 Feb 2023 16:16:30 +0100 Subject: [PATCH 23/29] Add CHANGELOG entry --- changelog.d/5-internal/local-end2end-setup | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog.d/5-internal/local-end2end-setup diff --git a/changelog.d/5-internal/local-end2end-setup b/changelog.d/5-internal/local-end2end-setup new file mode 100644 index 0000000000..ac38cd1546 --- /dev/null +++ b/changelog.d/5-internal/local-end2end-setup @@ -0,0 +1 @@ +New integration test script with support for running end2end tests locally From a7727858e696c1846015b3943a8757f57d360a7a Mon Sep 17 00:00:00 2001 From: Paolo Capriotti Date: Wed, 8 Feb 2023 09:56:12 +0100 Subject: [PATCH 24/29] Leftovers --- services/brig/test/integration/Federation/End2end.hs | 8 ++++---- services/federator/src/Federator/Error.hs | 3 +-- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/services/brig/test/integration/Federation/End2end.hs b/services/brig/test/integration/Federation/End2end.hs index 18808516e4..a13db52313 100644 --- a/services/brig/test/integration/Federation/End2end.hs +++ b/services/brig/test/integration/Federation/End2end.hs @@ -183,14 +183,14 @@ testGetUsersById brig1 brig2 = do testClaimPrekeySuccess :: Brig -> Brig -> Http () testClaimPrekeySuccess brig1 brig2 = do - self <- randomUser brig2 - user <- randomUser brig1 + self <- randomUser brig1 + user <- randomUser brig2 let new = defNewClient TemporaryClientType (take 1 somePrekeys) (Imports.head someLastPrekeys) - c <- responseJsonError =<< addClient brig1 (userId user) new + c <- responseJsonError =<< addClient brig2 (userId user) new let cpk = ClientPrekey (clientId c) (Imports.head somePrekeys) let quser = userQualifiedId user get - ( brig2 + ( brig1 . zUser (userId self) . paths [ "users", diff --git a/services/federator/src/Federator/Error.hs b/services/federator/src/Federator/Error.hs index b680212876..4e62a06ee1 100644 --- a/services/federator/src/Federator/Error.hs +++ b/services/federator/src/Federator/Error.hs @@ -22,7 +22,6 @@ module Federator.Error where import qualified Data.Aeson as A -import Debug.Trace import Imports import Network.HTTP.Types.Header import qualified Network.Wai as Wai @@ -33,4 +32,4 @@ class AsWai e where waiErrorDescription :: e -> Text errorResponse :: [Header] -> Wai.Error -> Wai.Response -errorResponse hdrs e = Wai.responseLBS (Wai.code e) hdrs (traceShowId (A.encode e)) +errorResponse hdrs e = Wai.responseLBS (Wai.code e) hdrs (A.encode e) From 7f493e9a2a1c2f705278d1f609f18db93c8050ad Mon Sep 17 00:00:00 2001 From: Paolo Capriotti Date: Wed, 8 Feb 2023 10:43:28 +0100 Subject: [PATCH 25/29] Add pyyaml to nix --- hack/bin/cabal-run-integration.sh | 2 +- nix/overlay.nix | 2 +- nix/wire-server.nix | 3 ++- services/run-services | 3 +++ services/start-services-only.sh | 2 +- 5 files changed, 8 insertions(+), 4 deletions(-) diff --git a/hack/bin/cabal-run-integration.sh b/hack/bin/cabal-run-integration.sh index f92a8c5284..57cf9a2874 100755 --- a/hack/bin/cabal-run-integration.sh +++ b/hack/bin/cabal-run-integration.sh @@ -46,7 +46,7 @@ run_integration_tests() { service_dir="$TOP_LEVEL/services/$package" cd "$service_dir" - "/usr/bin/python3" "$TOP_LEVEL/services/run-services" \ + "$TOP_LEVEL/services/run-services" \ "$TOP_LEVEL/dist/$package-integration" \ -s "$service_dir/$package.integration.yaml" \ -i "$TOP_LEVEL/services/integration.yaml" \ diff --git a/nix/overlay.nix b/nix/overlay.nix index 1f20335dbb..7657680fd7 100644 --- a/nix/overlay.nix +++ b/nix/overlay.nix @@ -87,7 +87,7 @@ self: super: { inherit (super) stdenv fetchurl; }; - helm = super.callPackage ./pkgs/helm {}; + helm = super.callPackage ./pkgs/helm { }; helmfile = staticBinary { pname = "helmfile"; diff --git a/nix/wire-server.nix b/nix/wire-server.nix index c215298f8b..8fc1b22edd 100644 --- a/nix/wire-server.nix +++ b/nix/wire-server.nix @@ -372,7 +372,8 @@ in pkgs.kind pkgs.netcat pkgs.niv - pkgs.python3 + (pkgs.python3.withPackages + (ps: with ps; [ pyyaml ])) pkgs.rsync pkgs.wget pkgs.yq diff --git a/services/run-services b/services/run-services index 63c7b24a78..1ad867fa99 100755 --- a/services/run-services +++ b/services/run-services @@ -348,6 +348,9 @@ def start_backend(services, keyspace, domain, backend_name): to_be_checked = to_be_checked_again time.sleep(0.05) + # TODO: elapse timeout so that the timeout thread doesn't hold up the + # process + if failed_instances: cleanup_instances(instances) raise SpawnFailException(failed_instances) diff --git a/services/start-services-only.sh b/services/start-services-only.sh index db1bd2b96f..b7cde8153a 100755 --- a/services/start-services-only.sh +++ b/services/start-services-only.sh @@ -8,4 +8,4 @@ set -eo pipefail SERVICES_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" # call run-services, show a message, then sleep (instead of executing a test executable) -"/usr/bin/python3" "$SERVICES_DIR/run-services" bash -c 'printf "(This will hang, Control+C to close.)\nNow you can manually curl them or start an integration test executable manually with e.g. \n(first cd to a service dir for correct working directory)\n cd services/brig && ../../dist/brig-integration -s brig.integration.yaml -i ../integration.yaml\n" && sleep 1000000' +"$SERVICES_DIR/run-services" bash -c 'printf "(This will hang, Control+C to close.)\nNow you can manually curl them or start an integration test executable manually with e.g. \n(first cd to a service dir for correct working directory)\n cd services/brig && ../../dist/brig-integration -s brig.integration.yaml -i ../integration.yaml\n" && sleep 1000000' From 362d10368ee2c36579ece35f9f6d67d9df27f84c Mon Sep 17 00:00:00 2001 From: Paolo Capriotti Date: Wed, 8 Feb 2023 10:53:09 +0100 Subject: [PATCH 26/29] Replace timeout thread with a simple time check --- services/run-services | 21 +++++---------------- 1 file changed, 5 insertions(+), 16 deletions(-) diff --git a/services/run-services b/services/run-services index 1ad867fa99..aa7bef883a 100755 --- a/services/run-services +++ b/services/run-services @@ -316,26 +316,15 @@ def start_backend(services, keyspace, domain, backend_name): failed_instances = [instance for instance in instances if instance.exception is not None] - # set up a timeout for checking instances - timeout = threading.Condition() - checking = True - def check_timeout(): - nonlocal checking - time.sleep(5) - with timeout: - checking = False - timeout.notify() - threading.Thread(target=check_timeout).start() - # check instances to_be_checked = [instance for instance in instances if instance.exception is None] + start_time = time.time() while to_be_checked: - with timeout: - if not checking: - print(f"{Colors.RED}Timeout while spawing services{Colors.RESET}") - failed_instances.extend(to_be_checked) - break + if time.time() - start_time >= 5: + print(f"{Colors.RED}Timeout while spawing services{Colors.RESET}") + failed_instances.extend(to_be_checked) + break to_be_checked_again = set() for instance in to_be_checked: From 0d3834d6e6a6fbc4d1894df9b1e6c10b9fc3454f Mon Sep 17 00:00:00 2001 From: Paolo Capriotti Date: Wed, 8 Feb 2023 10:57:30 +0100 Subject: [PATCH 27/29] Move start-service-only logic to run-service --- services/run-services | 15 ++++++++++----- services/start-services-only.sh | 3 +-- 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/services/run-services b/services/run-services index aa7bef883a..a91cc4971c 100755 --- a/services/run-services +++ b/services/run-services @@ -418,11 +418,16 @@ if __name__ == '__main__': if ENABLE_FEDERATION: instances |= start_backend(backend_b, "test2", "b.example.com", "B") - # main script - ret = subprocess.run(sys.argv[1:], - env=dict(list(os.environ.items()) + - list(environment.items()))) - sys.exit(ret.returncode) + # run main script or just wait forever + if len(sys.argv) == 1: + print("(This will hang, Control+C to close.)") + print("Now you can manually curl them or start an integration test executable manually with e.g. \n(first cd to a service dir for correct working directory)\n cd services/brig && ../../dist/brig-integration -s brig.integration.yaml -i ../integration.yaml") + signal.pause() + else: + ret = subprocess.run(sys.argv[1:], + env=dict(list(os.environ.items()) + + list(environment.items()))) + sys.exit(ret.returncode) except KeyboardInterrupt: pass except SpawnFailException as e: diff --git a/services/start-services-only.sh b/services/start-services-only.sh index b7cde8153a..374e12f285 100755 --- a/services/start-services-only.sh +++ b/services/start-services-only.sh @@ -2,10 +2,9 @@ # Run all haskell services without immediately starting a test executable. # Can be useful for manually poking at the API. - set -eo pipefail SERVICES_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" # call run-services, show a message, then sleep (instead of executing a test executable) -"$SERVICES_DIR/run-services" bash -c 'printf "(This will hang, Control+C to close.)\nNow you can manually curl them or start an integration test executable manually with e.g. \n(first cd to a service dir for correct working directory)\n cd services/brig && ../../dist/brig-integration -s brig.integration.yaml -i ../integration.yaml\n" && sleep 1000000' +exec "$SERVICES_DIR/run-services" From 64d86878c58d45bba35454f3dcd6370e2f60f89d Mon Sep 17 00:00:00 2001 From: Paolo Capriotti Date: Thu, 9 Feb 2023 13:36:46 +0100 Subject: [PATCH 28/29] Set up different ES indices for the two backends --- Makefile | 6 ++++-- services/brig/src/Brig/Index/Options.hs | 13 ++++++++++++- 2 files changed, 16 insertions(+), 3 deletions(-) diff --git a/Makefile b/Makefile index 08c5577729..88e80bfe4e 100644 --- a/Makefile +++ b/Makefile @@ -290,7 +290,8 @@ ifeq ($(INTEGRATION_FEDERATION_TESTS), 1) $(EXE_SCHEMA) --keyspace $(package)_test2 --replication-factor 1 --reset endif endif - ./dist/brig-index reset --elasticsearch-server http://localhost:9200 > /dev/null + ./dist/brig-index reset --elasticsearch-index directory_test --elasticsearch-server http://localhost:9200 > /dev/null + ./dist/brig-index reset --elasticsearch-index directory_test2 --elasticsearch-server http://localhost:9200 > /dev/null # Usage: # @@ -305,13 +306,14 @@ db-migrate: c ./dist/galley-schema --keyspace galley_test --replication-factor 1 > /dev/null ./dist/gundeck-schema --keyspace gundeck_test --replication-factor 1 > /dev/null ./dist/spar-schema --keyspace spar_test --replication-factor 1 > /dev/null - ./dist/brig-index reset --elasticsearch-server http://localhost:9200 > /dev/null ifeq ($(INTEGRATION_FEDERATION_TESTS), 1) ./dist/brig-schema --keyspace brig_test2 --replication-factor 1 > /dev/null ./dist/galley-schema --keyspace galley_test2 --replication-factor 1 > /dev/null ./dist/gundeck-schema --keyspace gundeck_test2 --replication-factor 1 > /dev/null ./dist/spar-schema --keyspace spar_test2 --replication-factor 1 > /dev/null endif + ./dist/brig-index reset --elasticsearch-index-prefix directory --elasticsearch-server http://localhost:9200 > /dev/null + ./dist/brig-index reset --elasticsearch-index-prefix directory2 --elasticsearch-server http://localhost:9200 > /dev/null ################################# ## dependencies diff --git a/services/brig/src/Brig/Index/Options.hs b/services/brig/src/Brig/Index/Options.hs index c614172794..af164ff20f 100644 --- a/services/brig/src/Brig/Index/Options.hs +++ b/services/brig/src/Brig/Index/Options.hs @@ -146,7 +146,18 @@ elasticServerParser = restrictedElasticSettingsParser :: Parser ElasticSettings restrictedElasticSettingsParser = do server <- elasticServerParser - pure $ localElasticSettings & esServer .~ server + prefix <- + strOption + ( long "elasticsearch-index-prefix" + <> metavar "PREFIX" + <> help "Elasticsearch Index Prefix. The actual index name will be PREFIX_test." + <> value "directory" + <> showDefault + ) + pure $ + localElasticSettings + & esServer .~ server + & esIndex .~ ES.IndexName (prefix <> "_test") indexNameParser :: Parser ES.IndexName indexNameParser = From a1f1c24e9abaed761644fe0693edef875adfc875 Mon Sep 17 00:00:00 2001 From: Paolo Capriotti Date: Thu, 9 Feb 2023 14:02:15 +0100 Subject: [PATCH 29/29] Use alternate index for the second backend --- services/run-services | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/services/run-services b/services/run-services index a91cc4971c..9fdbf894e6 100755 --- a/services/run-services +++ b/services/run-services @@ -127,9 +127,9 @@ class Instance: except urllib.error.URLError: return False - def spawn(self, service_map, environment, keyspace, domain, backend_name): + def spawn(self, service_map, environment, suffix, domain, backend_name): try: - config_file = self.modified_config_file(service_map, keyspace, domain) + config_file = self.modified_config_file(service_map, suffix, domain) sub = self.service.spawn(config_file, environment) t = threading.Thread(target=lambda: color_output(sub, self.service, backend_name)) t.start() @@ -137,7 +137,7 @@ class Instance: except Exception as e: return Instance(self.service, self.port, exception=e) - def modified_config_file(self, service_map, keyspace, domain): + def modified_config_file(self, service_map, suffix, domain): """Overwrite port configuration on this service using the provided service_map. @@ -155,7 +155,11 @@ class Instance: # set cassandra keyspace if 'cassandra' in data: - data['cassandra']['keyspace'] = f"{self.service.name}_{keyspace}" + data['cassandra']['keyspace'] = f"{self.service.name}_test{suffix}" + + # set elasticseach index + if 'elasticsearch' in data: + data['elasticsearch']['index'] = f"directory{suffix}_test" # set federation domain if 'optSettings' in data: @@ -183,10 +187,10 @@ class Instance: data[self.service.name]['port'] = self.port class DummyInstance(Instance): - def spawn(self, service_map, environment, keyspace, domain, backend_name): + def spawn(self, service_map, environment, suffix, domain, backend_name): return self - def modified_config_file(self, service_map, keyspace, domain): + def modified_config_file(self, service_map, suffix, domain): return "" def check_status(self): @@ -209,7 +213,7 @@ class NginzInstance(Instance): self.fed_port = fed_port super().__init__(NGINZ, local_port) - def modified_config_file(self, service_map, keyspace, domain): + def modified_config_file(self, service_map, suffix, domain): # Create a whole temporary directory and copy all nginx's config files. # This is necessary because nginx assumes local imports are relative to # the location of the main configuration file. @@ -305,13 +309,13 @@ def cleanup_instances(instances): instance.process.send_signal(signal.SIGKILl) instance.thread.join() -def start_backend(services, keyspace, domain, backend_name): +def start_backend(services, suffix, domain, backend_name): # build a service map by choosing an arbitrary instance of each service service_map = dict((s.service, s.port) for s in services) instances = set() for blueprint in services: - instances.add(blueprint.spawn(service_map, environment, keyspace, domain, backend_name)) + instances.add(blueprint.spawn(service_map, environment, suffix, domain, backend_name)) failed_instances = [instance for instance in instances if instance.exception is not None] @@ -414,9 +418,9 @@ if __name__ == '__main__': try: instances = set() - instances |= start_backend(backend_a, "test", "example.com", "A") + instances |= start_backend(backend_a, "", "example.com", "A") if ENABLE_FEDERATION: - instances |= start_backend(backend_b, "test2", "b.example.com", "B") + instances |= start_backend(backend_b, "2", "b.example.com", "B") # run main script or just wait forever if len(sys.argv) == 1: