aws · dougch · Jan 10, 2023 · Dec 9, 2022 · Dec 12, 2022 · Dec 12, 2022
diff --git a/tests/integrationv2/README.md b/tests/integrationv2/README.md
@@ -157,3 +157,47 @@ An example of how to test that the server and the client can send and receive ap
 An error similar to this is caused by a runtime error in a test. In `tox.ini` change `-n8` to `-n0` to
 see the actual error causing the OSError.
 
+
+# Criterion
+
+### Why
+
+We wanted to use the rust criterion project to benchmark s2n-tls, without re-writing all the integration tests.
+To accomplish this, we created some criterion benchmarks that use s2nc and s2nd, and a new provider, CriterionS2N, in the python testing framework.
+
+### Prerequisites
+
+Normally, you'd run criterion with `cargo bench --bench <name>`, but cargo does some checks to see if it needs to rebuild
+the benchmark and other housekeeping that slows things down a bit.  Running `cargo bench --no-run` is the benchmark equivalent to `cargo build` and will produce a binary executable.
+
+The CI will run `make install` and `make -C ./bindings/rust` to create and install the s2nc/d binaries in a system-wide location, then build the cargo criterion binary handlers for s2nc and s2nd.
+
+### What is happening inside the CriterionS2N provider?
+
+The CriterionS2N provider modifies the test command being run by pytest to be the criterion benchmark binary and moves the s2nc/d command line arguments into an environment variable.  The binary created by `cargo bench --no-run` has a unique name and must be searched for, which the CriterionS2N provider finds and replaces in the final testing command.
+The arguments to s2nc/d are moved to the environment variables `S2NC_ARGS` or `S2ND_ARGS`, which are read by the rust benchmark when spawning s2nc/d as subprocesses.
+
+The name of the criterion report is based on the test name; constructed from the s2nc/d command line arguments.
+
+Criterion needs 2 passes to generate a report showing changes between two runs. The first pass establishes a base line, the second run a delta dataset, and a report.
+These modes have slightly different command line arguments to criterion and are controlled with the environment variable S2N_USE_CRITERION in the provider.
+
+### Running locally
+
+The Criterion CodeBuild scripts can be used to run these locally, by setting LOCAL_TESTING the s3/github interactions are disabled. Tooling needed includes python3.9, rust, and write permissions to `/usr/local/bin|lib` (or use sudo) - in addition to the traditional C build tooling.
+
+```
+export LOCAL_TESTING=true
+INTEGV2_TEST=test_well_known_endpoints ./codebuild/bin/criterion_baseline.sh
+INTEGV2_TEST=test_well_known_endpoints ./codebuild/bin/criterion_delta.sh
+```
+
+The resulting reports are viewable via `tests/integrationv2/target/criterion/report/index.html`
+
+
+
+## Troubleshooting CriterionS2N
+
+The most direct trouble-shooting is done using the [interactive troubleshooting CodeBuild](https://docs.aws.amazon.com/codebuild/latest/userguide/session-manager.html#ssm-pause-build) `codebuild-break` line in the buildspec. Put the break right before the main build step and run interactivly.
+
+As mentioned above, in order to get more output from the tests, set the `-n` or `XDIST_WORKER` environment variable to 0 and add a -v to the pytest command line in tox.ini.
diff --git a/tests/integrationv2/conftest.py b/tests/integrationv2/conftest.py
@@ -1,4 +1,4 @@
-from global_flags import set_flag, S2N_PROVIDER_VERSION, S2N_FIPS_MODE, S2N_NO_PQ
+from global_flags import set_flag, S2N_PROVIDER_VERSION, S2N_FIPS_MODE, S2N_NO_PQ, S2N_USE_CRITERION
 
 
 def pytest_addoption(parser):
@@ -8,6 +8,8 @@ def pytest_addoption(parser):
                      default=False, type=int, help="S2N is running in FIPS mode")
     parser.addoption("--no-pq", action="store", dest="no-pq",
                      default=False, type=int, help="Turn off PQ support")
+    parser.addoption("--provider-criterion", action="store", dest="provider-criterion",
+                     default=False, type=str, help="Use Criterion provider in one of 3 modes: [off,delta,baseline,report]")
 
 
 def pytest_configure(config):
@@ -27,6 +29,7 @@ def pytest_configure(config):
         set_flag(S2N_FIPS_MODE, True)
 
     set_flag(S2N_PROVIDER_VERSION, config.getoption('provider-version', None))
+    set_flag(S2N_USE_CRITERION, config.getoption('provider-criterion', None))
 
 
 def pytest_collection_modifyitems(config, items):

diff --git a/tests/integrationv2/fixtures.py b/tests/integrationv2/fixtures.py
@@ -2,9 +2,10 @@
 import pytest
 import subprocess
 
+from global_flags import get_flag, S2N_USE_CRITERION
 from processes import ManagedProcess
-from providers import Provider
-from common import ProviderOptions 
+from providers import Provider, CriterionS2N, S2N
+from common import ProviderOptions
 
 
 @pytest.fixture
@@ -21,6 +22,11 @@ def managed_process():
 
     def _fn(provider_class: Provider, options: ProviderOptions, timeout=5, send_marker=None, close_marker=None,
             expect_stderr=None, kill_marker=None, send_with_newline=None):
+        if provider_class == S2N and get_flag(S2N_USE_CRITERION) != "off":
+            provider_class = CriterionS2N
+            # This comes from the number of iterations specific in the rust benchmark handler(s).
+            # currently set at 10 iterations, so give us 10x more time.
+            timeout = timeout * 10
         provider = provider_class(options)
         cmd_line = provider.get_cmd_line()
         # The process will default to send markers in the providers.py file

diff --git a/tests/integrationv2/global_flags.py b/tests/integrationv2/global_flags.py
@@ -12,6 +12,9 @@
 # (set from the S2N_LIBCRYPTO env var, which is how the original integration test works)
 S2N_PROVIDER_VERSION = 's2n_provider_version'
 
+# From S2N_USE_CRITERION env var.
+S2N_USE_CRITERION = 's2n_use_criterion'
+
 _flags = {}
 
 

diff --git a/tests/integrationv2/providers.py b/tests/integrationv2/providers.py
@@ -1,8 +1,11 @@
+import os
 import pytest
 import threading
 
 from common import ProviderOptions, Ciphers, Curves, Protocols, Signatures
 from global_flags import get_flag, S2N_PROVIDER_VERSION, S2N_FIPS_MODE
+from global_flags import S2N_USE_CRITERION
+from stat import *
 
 
 TLS_13_LIBCRYPTOS = {
@@ -308,8 +311,95 @@ def setup_server(self):
         return cmd_line
 
 
-class OpenSSL(Provider):
+class CriterionS2N(S2N):
+    """
+    Wrap the S2N provider in criterion-rust
+    """
+    criterion_off = 'off'
+    criterion_delta = 'delta'
+    criterion_baseline = 'baseline'
+    # Figure out what mode to run in: baseline, delta or report
+    criterion_mode = get_flag(S2N_USE_CRITERION)
+
+    def _find_s2n_benchmark(self, pattern):
+        # Use executable bit to find the correct file.
+        result = find_files(pattern, root_dir=self.cargo_root, modes=['0o775','0o755'])
+        if len(result) != 1:
+            raise FileNotFoundError(
+                f"Exactly one s2n criterion benchmark not found {result} with expected file mask.")
+        else:
+            return result[0]
+
+    def _find_cargo(self):
+        # return a path to the highest level dir containing Cargo.toml
+        shortest = None
+        for file in find_files("Cargo.toml", root_dir="../.."):
+            if shortest is None:
+                shortest = file
+            else:
+                if len(file) < len(shortest):
+                    shortest = file
+        if shortest is None:
+            raise FileNotFoundError("Unable to find Cargo.toml")
+        # Return the path, minus Cargo.toml
+        return os.path.dirname(shortest)
+
+    def _cargo_bench(self):
+        """
+        Find the handlers
+        """
+        self.s2nc_bench = self._find_s2n_benchmark("s2nc-")
+        self.s2nd_bench = self._find_s2n_benchmark("s2nd-")
+
+    def __init__(self, options: ProviderOptions):
+        super().__init__(options)
+        # Set cargo root
+        self.cargo_root = self._find_cargo()
+
+        # Find the criterion binaries
+        self._cargo_bench()
+
+        # strip off the s2nc/d at the front because criterion
+        if 's2nc' in self.cmd_line[0] or 's2nd' in self.cmd_line[0]:
+            self.cmd_line = self.cmd_line[1:]
+
+        # strip off the s2nc -e argument, criterion handler isn't sending any STDIN characters,
+        # and makes it look like s2nc is hanging.
+        if '-e' in self.cmd_line:
+            self.cmd_line.remove('-e')
+            print(f"***** cmd_line is now {self.cmd_line}")
 
+        # Copy the command arguments to an environment variable for the harness to read.
+        if self.options.mode == Provider.ServerMode:
+            self.options.env_overrides.update(
+                {'S2ND_ARGS': ' '.join(self.cmd_line)})
+            self.capture_server_args()
+        elif self.options.mode == Provider.ClientMode:
+            self.options.env_overrides.update(
+                {'S2NC_ARGS': ' '.join(self.cmd_line)})
+            if self.criterion_mode == CriterionS2N.criterion_baseline:
+                self.capture_client_args_baseline()
+            if self.criterion_mode == CriterionS2N.criterion_delta:
+                self.capture_client_args_delta()
+
+    def capture_server_args(self):
+        self.cmd_line = [self.s2nd_bench, "--bench",
+                        "s2nd", "--save-baseline", "main"]
+
+    # Saves baseline data with the tag "main"
+    # see https://bheisler.github.io/criterion.rs/book/user_guide/command_line_options.html
+    def capture_client_args_baseline(self):
+        self.cmd_line = [self.s2nc_bench, "--bench",
+                         "s2nc", "--save-baseline", "main"]
+
+    # "By default, Criterion.rs will compare the measurements against the previous run"
+    # This run is stored with the tag "new"
+    # https://bheisler.github.io/criterion.rs/book/user_guide/command_line_options.html
+    def capture_client_args_delta(self):
+        self.cmd_line = [self.s2nc_bench, "--bench", "s2nc", "--plotting-backend", "plotters","--baseline","main"]
+
+
+class OpenSSL(Provider):
     _version = get_flag(S2N_PROVIDER_VERSION)
 
     def __init__(self, options: ProviderOptions):
@@ -807,3 +897,30 @@ def supports_cipher(cls, cipher, with_curve=None):
     @classmethod
     def supports_signature(cls, signature):
         return GnuTLS.sigalg_to_priority_str(signature) is not None
+
+
+
+def find_files(file_glob, root_dir=".", modes=[]):
+    """
+    find util in python form.
+    file_glob: a snippet of the filename, e.g. ".py"
+    root_dir: starting point for search
+    mode is an octal representation of owner/group/other, e.g.: '0o644'
+    """
+    result = []
+    for root, dirs, files in os.walk(root_dir):
+        for file in files:
+            if file_glob in file:
+                full_name = os.path.abspath(os.path.join(root, file))
+                if len(modes) != 0:
+                    try:
+                        stat = oct(S_IMODE(os.stat(full_name).st_mode))
+                        if stat in modes:
+                            result.append(full_name)
+                    except FileNotFoundError:
+                        # symlinks
+                        pass
+                else:
+                    result.append(full_name)
+
+    return result
diff --git a/tests/integrationv2/tox.ini b/tests/integrationv2/tox.ini
@@ -5,16 +5,19 @@ skipsdist = True
 [testenv]
 # install pytest in the virtualenv where commands will be executed
 setenv = S2N_INTEG_TEST = 1
-passenv = DYLD_LIBRARY_PATH LD_LIBRARY_PATH OQS_OPENSSL_1_1_1_INSTALL_DIR
+passenv = DYLD_LIBRARY_PATH LD_LIBRARY_PATH OQS_OPENSSL_1_1_1_INSTALL_DIR HOME TOX_TEST_NAME
+ignore_errors=False
 deps =
     pep8
     pytest==5.3.5
     pytest-xdist==1.34.0
     sslyze==5.0.2
     pytest-rerunfailures
 commands =
-    pytest -n 2 --maxfail=1 --reruns=2 --cache-clear -rpfsq \
+    pytest -x -n={env:XDIST_WORKERS:"2"} --maxfail=1 --reruns=2 --cache-clear -rpfsq \
+        -o log_cli=true --log-cli-level=INFO \
         --provider-version={env:S2N_LIBCRYPTO} \
+        --provider-criterion={env:S2N_USE_CRITERION:"off"} \
         --fips-mode={env:S2N_TEST_IN_FIPS_MODE:"0"} \
         --no-pq={env:S2N_NO_PQ:"0"} \
         {env:TOX_TEST_NAME:""}
diff --git a/tests/integrationv2/utils.py b/tests/integrationv2/utils.py
@@ -1,6 +1,6 @@
 from common import Protocols
 from providers import S2N
-from global_flags import get_flag, S2N_FIPS_MODE
+from global_flags import get_flag, S2N_FIPS_MODE, S2N_USE_CRITERION
 
 
 def to_bytes(val):
@@ -63,6 +63,8 @@ def invalid_test_parameters(*args, **kwargs):
     cipher = kwargs.get('cipher')
     curve = kwargs.get('curve')
     signature = kwargs.get('signature')
+    endpoint = kwargs.get('endpoint')
+    criterion_skip_endpoints = ['www.netflix.com']
 
     providers = [provider_ for provider_ in [provider, other_provider] if provider_]
     # Always consider S2N
@@ -80,6 +82,12 @@ def invalid_test_parameters(*args, **kwargs):
         if not provider_.supports_protocol(protocol):
             return True
 
+    # Skip some endpoints for criterion.
+    if endpoint is not None:
+        if provider is S2N and get_flag(S2N_USE_CRITERION) != "off" and endpoint in criterion_skip_endpoints:
+            return True
+
+
     if cipher is not None:
         # If the selected protocol doesn't allow the cipher, don't test
         if protocol is not None: