Extended HSET benchmarks to include larger field count (#272)

* Included LPOS benchmarks * Added LINDEX LINSERT and LREM benchmarks * Extended HSET benchmarks to include larger field count * Included 24.04 OS checks * SPEC fields validation fix
redis · Sep 20, 2024 · ad690c3 · ad690c3
1 parent caebf3e
commit ad690c3
Show file tree

Hide file tree

Showing 17 changed files with 487 additions and 27 deletions.
diff --git a/.github/workflows/tox.yml b/.github/workflows/tox.yml
@@ -11,6 +11,7 @@ jobs:
     strategy:
       matrix:
         python-version: [ '3.10', '3.11', '3.12' ]
+        os: [ "ubuntu-latest", "ubuntu-24.04" ]
       fail-fast: false
     env:
       ACTIONS_ALLOW_UNSECURE_COMMANDS: true

diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "redis-benchmarks-specification"
-version = "0.1.235"
+version = "0.1.240"
 description = "The Redis benchmarks specification describes the cross-language/tools requirements and expectations to foster performance and observability standards around redis related technologies. Members from both industry and academia, including organizations and individuals are encouraged to contribute."
 authors = ["filipecosta90 <[email protected]>","Redis Performance Group <[email protected]>"]
 readme = "Readme.md"

diff --git a/redis_benchmarks_specification/__cli__/args.py b/redis_benchmarks_specification/__cli__/args.py
@@ -5,7 +5,7 @@
 #
 import datetime
 import os
-
+from distutils.util import strtobool
 from redis_benchmarks_specification.__common__.env import (
     GH_REDIS_SERVER_HOST,
     GH_TOKEN,
@@ -71,6 +71,12 @@ def spec_cli_args(parser):
         action="store_true",
         help="Include modules statistics on commandstats.",
     )
+    parser.add_argument(
+        "--use-git-timestamp",
+        type=lambda x: bool(strtobool(x)),
+        default=True,
+        help="Use git timestamp",
+    )
     parser.add_argument("--github_token", type=str, default=PERFORMANCE_GH_TOKEN)
     parser.add_argument("--pull-request", type=str, default=None, nargs="?", const="")
     parser.add_argument(

diff --git a/redis_benchmarks_specification/__cli__/cli.py b/redis_benchmarks_specification/__cli__/cli.py
@@ -445,6 +445,15 @@ def trigger_tests_cli_command_logic(args, project_name, project_version):
                 git_branch = cdict["git_branch"]
             commit_datetime = cdict["commit_datetime"]
             commit_summary = cdict["commit_summary"]
+            reply_fields = {}
+            use_git_timestamp = args.use_git_timestamp
+            if use_git_timestamp is False:
+                reply_fields["use_git_timestamp"] = str(use_git_timestamp)
+
+            logging.info(
+                f"Setting use use_git_timestamp={use_git_timestamp}. ({args.use_git_timestamp})"
+            )
+
             if result is True:
                 stream_id = "n/a"
                 if args.dry_run is False:
@@ -455,7 +464,7 @@ def trigger_tests_cli_command_logic(args, project_name, project_version):
                     ) = request_build_from_commit_info(
                         conn,
                         commit_dict,
-                        {},
+                        reply_fields,
                         binary_key,
                         binary_value,
                         REDIS_BINS_EXPIRE_SECS,

diff --git a/redis_benchmarks_specification/__common__/github.py b/redis_benchmarks_specification/__common__/github.py
@@ -107,32 +107,37 @@ def check_github_available_and_actionable(
         logging.info("Detected github token")
         g = Github(github_token)
         if pull_request is not None and pull_request != "":
-            pull_request_n = int(pull_request)
-            github_pr = (
-                g.get_user(tf_github_org)
-                .get_repo(tf_github_repo)
-                .get_issue(pull_request_n)
-            )
-            comments = github_pr.get_comments()
-            pr_link = github_pr.html_url
-            logging.info("Working on github PR already: {}".format(pr_link))
-            is_actionable_pr = True
-            contains_regression_comment, pos = fn(comments)
-            if contains_regression_comment:
-                regression_comment = comments[pos]
-                old_regression_comment_body = regression_comment.body
-                logging.info(
-                    "Already contains PR comment. Link: {}".format(
-                        regression_comment.html_url
+            try:
+                pull_request_n = int(pull_request)
+                github_pr = (
+                    g.get_user(tf_github_org)
+                    .get_repo(tf_github_repo)
+                    .get_issue(pull_request_n)
+                )
+                comments = github_pr.get_comments()
+                pr_link = github_pr.html_url
+                logging.info("Working on github PR already: {}".format(pr_link))
+                is_actionable_pr = True
+                contains_regression_comment, pos = fn(comments)
+                if contains_regression_comment:
+                    regression_comment = comments[pos]
+                    old_regression_comment_body = regression_comment.body
+                    logging.info(
+                        "Already contains PR comment. Link: {}".format(
+                            regression_comment.html_url
+                        )
                     )
+                    if verbose:
+                        logging.info("Printing old PR comment:")
+                        print("".join(["-" for x in range(1, 80)]))
+                        print(regression_comment.body)
+                        print("".join(["-" for x in range(1, 80)]))
+                else:
+                    logging.info("Does not contain PR comment")
+            except Exception as e:
+                logging.error(
+                    f"an error occured when checking github info. {e.__str__()}. proceeding..."
                 )
-                if verbose:
-                    logging.info("Printing old PR comment:")
-                    print("".join(["-" for x in range(1, 80)]))
-                    print(regression_comment.body)
-                    print("".join(["-" for x in range(1, 80)]))
-            else:
-                logging.info("Does not contain PR comment")
             logging.info(
                 f"contains_regression_comment: {contains_regression_comment}, is_actionable_pr: {is_actionable_pr}, pr_link: {pr_link}"
             )

diff --git a/redis_benchmarks_specification/__compare__/compare.py b/redis_benchmarks_specification/__compare__/compare.py
@@ -664,6 +664,10 @@ def compute_regression_table(
         total_comparison_points,
         regressions_list,
         improvements_list,
+        unstable_list,
+        baseline_only_list,
+        comparison_only_list,
+        no_datapoints_list,
     ) = from_rts_to_regression_table(
         baseline_deployment_name,
         comparison_deployment_name,
@@ -708,6 +712,29 @@ def compute_regression_table(
         baseline_deployment_name,
     )
 
+    if total_unstable > 0:
+        old_stdout = sys.stdout
+        sys.stdout = mystdout = StringIO()
+        table_output += "#### Unstable Table\n\n"
+        writer_regressions = MarkdownTableWriter(
+            table_name="",
+            headers=[
+                "Test Case",
+                f"Baseline {baseline_github_org}/{baseline_github_repo} {baseline_str} (median obs. +- std.dev)",
+                f"Comparison {comparison_github_org}/{comparison_github_repo} {comparison_str} (median obs. +- std.dev)",
+                "% change ({})".format(metric_mode),
+                "Note",
+            ],
+            value_matrix=table_unstable,
+        )
+        writer_regressions.dump(mystdout, False)
+        table_output += mystdout.getvalue()
+        table_output += "\n\n"
+        test_names_str = "|".join([l[0] for l in unstable_list])
+        table_output += f"Unstable test regexp names: {test_names_str}\n\n"
+        mystdout.close()
+        sys.stdout = old_stdout
+
     if total_regressions > 0:
         old_stdout = sys.stdout
         sys.stdout = mystdout = StringIO()
@@ -774,6 +801,27 @@ def compute_regression_table(
     sys.stdout = old_stdout
     table_output += mystdout.getvalue()
     table_output += "\n</details>\n"
+    len_baseline_only_list = len(baseline_only_list)
+    if len_baseline_only_list > 0:
+        table_output += f"\n  WARNING: There were {len_baseline_only_list} benchmarks with datapoints only on baseline.\n\n"
+        baseline_only_test_names_str = "|".join([l[0] for l in baseline_only_list])
+        table_output += (
+            f"  Baseline only test regexp names: {baseline_only_test_names_str}\n\n"
+        )
+    len_comparison_only_list = len(comparison_only_list)
+    if len_comparison_only_list > 0:
+        table_output += f"\n  WARNING: There were {len_comparison_only_list} benchmarks with datapoints only on comparison.\n\n"
+        comparison_only_test_names_str = "|".join([l[0] for l in comparison_only_list])
+        table_output += (
+            f"  Comparison only test regexp names: {comparison_only_test_names_str}\n\n"
+        )
+    len_no_datapoints = len(no_datapoints_list)
+    if len_no_datapoints > 0:
+        table_output += f"\n  WARNING: There were {len_no_datapoints} benchmarks with NO datapoints for both baseline and comparison.\n\n"
+        no_datapoints_test_names_str = "|".join([l[0] for l in no_datapoints_list])
+        table_output += (
+            f"  NO DATAPOINTS test regexp names: {no_datapoints_test_names_str}\n\n"
+        )
 
     return (
         detected_regressions,
@@ -967,6 +1015,10 @@ def from_rts_to_regression_table(
     progress = tqdm(unit="benchmark time-series", total=len(test_names))
     regressions_list = []
     improvements_list = []
+    unstable_list = []
+    baseline_only_list = []
+    comparison_only_list = []
+    no_datapoints_list = []
     for test_name in test_names:
         compare_version = "main"
         github_link = "https://github.com/redis/redis-benchmarks-specification/blob"
@@ -1110,6 +1162,17 @@ def from_rts_to_regression_table(
             logging.error("Detected a ZeroDivisionError. {}".format(e.__str__()))
             pass
         unstable = False
+
+        if baseline_v != "N/A" and comparison_v == "N/A":
+            logging.warning(
+                "Baseline contains datapoints but comparison not for test: {test_name}"
+            )
+            baseline_only_list.append(test_name)
+        if comparison_v != "N/A" and baseline_v == "N/A":
+            logging.warning(
+                "Comparison contains datapoints but baseline not for test: {test_name}"
+            )
+            comparison_only_list.append(test_name)
         if (
             baseline_v != "N/A"
             and comparison_pct_change != "N/A"
@@ -1119,6 +1182,7 @@ def from_rts_to_regression_table(
             if comparison_pct_change > 10.0 or baseline_pct_change > 10.0:
                 note = "UNSTABLE (very high variance)"
                 unstable = True
+                unstable_list.append([test_name, "n/a"])
 
             baseline_v_str = prepare_value_str(
                 baseline_pct_change, baseline_v, baseline_values, simplify_table
@@ -1212,6 +1276,21 @@ def from_rts_to_regression_table(
             if should_add_line:
                 total_comparison_points = total_comparison_points + 1
                 table_full.append(line)
+        else:
+            logging.warning(
+                "There were no datapoints both for baseline and comparison for test: {test_name}"
+            )
+            no_datapoints_list.append(test_name)
+    logging.warning(
+        f"There is a total of {len(no_datapoints_list)} tests without datapoints for baseline AND comparison"
+    )
+    logging.info(
+        f"There is a total of {len(comparison_only_list)} tests without datapoints for baseline"
+    )
+    logging.info(
+        f"There is a total of {len(baseline_only_list)} tests without datapoints for comparison"
+    )
+    logging.info(f"There is a total of {len(unstable_list)} UNSTABLE tests")
     return (
         detected_regressions,
         table_full,
@@ -1226,6 +1305,10 @@ def from_rts_to_regression_table(
         total_comparison_points,
         regressions_list,
         improvements_list,
+        unstable_list,
+        baseline_only_list,
+        comparison_only_list,
+        no_datapoints_list,
     )
 
 

diff --git a/...fication/test-suites/memtier_benchmark-100Kkeys-load-hash-50-fields-with-1000B-values.yml b/...fication/test-suites/memtier_benchmark-100Kkeys-load-hash-50-fields-with-1000B-values.yml
@@ -0,0 +1,30 @@
+version: 0.4
+name: memtier_benchmark-100Kkeys-load-hash-50-fields-with-1000B-values
+description: Runs memtier_benchmark, for a keyspace length of 100K keys loading HASHES with 50 fields each. Each field value has a data size of 1000 Bytes.
+dbconfig:
+  configuration-parameters:
+    save: '""'
+  check:
+    keyspacelen: 0
+  resources:
+    requests:
+      memory: 6g
+tested-groups:
+- hash
+tested-commands:
+- hset
+redis-topologies:
+- oss-standalone
+build-variants:
+- gcc:8.5.0-amd64-debian-buster-default
+- dockerhub
+clientconfig:
+  run_image: redislabs/memtier_benchmark:edge
+  tool: memtier_benchmark
+  arguments: --test-time 120 --distinct-client-seed "--data-size" "1000" --command "HSET __key__ field:1 __data__ field:2 __data__ field:3 __data__ field:4 __data__ field:5 __data__ field:6 __data__ field:7 __data__ field:8 __data__ field:9 __data__ field:10 __data__ field:11 __data__ field:12 __data__ field:13 __data__ field:14 __data__ field:15 __data__ field:16 __data__ field:17 __data__ field:18 __data__ field:19 __data__ field:20 __data__ field:21 __data__ field:22 __data__ field:23 __data__ field:24 __data__ field:25 __data__ field:26 __data__ field:27 __data__ field:28 __data__ field:29 __data__ field:30 __data__ field:31 __data__ field:32 __data__ field:33 __data__ field:34 __data__ field:35 __data__ field:36 __data__ field:37 __data__ field:38 __data__ field:39 __data__ field:40 __data__ field:41 __data__ field:42 __data__ field:43 __data__ field:44 __data__ field:45 __data__ field:46 __data__ field:47 __data__ field:48 __data__ field:49 __data__ field:50 __data__" --command-key-pattern="R" --key-minimum=1 --key-maximum 100000 -c 50 -t 4 --hide-histogram
+  resources:
+    requests:
+      cpus: '4'
+      memory: 2g
+
+priority: 5
diff --git a/...ification/test-suites/memtier_benchmark-100Kkeys-load-hash-50-fields-with-100B-values.yml b/...ification/test-suites/memtier_benchmark-100Kkeys-load-hash-50-fields-with-100B-values.yml
@@ -0,0 +1,30 @@
+version: 0.4
+name: memtier_benchmark-100Kkeys-load-hash-50-fields-with-100B-values
+description: Runs memtier_benchmark, for a keyspace length of 100K keys loading HASHES with 50 fields each. Each field value has a data size of 100 Bytes.
+dbconfig:
+  configuration-parameters:
+    save: '""'
+  check:
+    keyspacelen: 0
+  resources:
+    requests:
+      memory: 6g
+tested-groups:
+- hash
+tested-commands:
+- hset
+redis-topologies:
+- oss-standalone
+build-variants:
+- gcc:8.5.0-amd64-debian-buster-default
+- dockerhub
+clientconfig:
+  run_image: redislabs/memtier_benchmark:edge
+  tool: memtier_benchmark
+  arguments: --test-time 120 --distinct-client-seed "--data-size" "100" --command "HSET __key__ field:1 __data__ field:2 __data__ field:3 __data__ field:4 __data__ field:5 __data__ field:6 __data__ field:7 __data__ field:8 __data__ field:9 __data__ field:10 __data__ field:11 __data__ field:12 __data__ field:13 __data__ field:14 __data__ field:15 __data__ field:16 __data__ field:17 __data__ field:18 __data__ field:19 __data__ field:20 __data__ field:21 __data__ field:22 __data__ field:23 __data__ field:24 __data__ field:25 __data__ field:26 __data__ field:27 __data__ field:28 __data__ field:29 __data__ field:30 __data__ field:31 __data__ field:32 __data__ field:33 __data__ field:34 __data__ field:35 __data__ field:36 __data__ field:37 __data__ field:38 __data__ field:39 __data__ field:40 __data__ field:41 __data__ field:42 __data__ field:43 __data__ field:44 __data__ field:45 __data__ field:46 __data__ field:47 __data__ field:48 __data__ field:49 __data__ field:50 __data__" --command-key-pattern="R" --key-minimum=1 --key-maximum 100000 -c 50 -t 4 --hide-histogram
+  resources:
+    requests:
+      cpus: '4'
+      memory: 2g
+
+priority: 5
diff --git a/...cification/test-suites/memtier_benchmark-100Kkeys-load-hash-50-fields-with-10B-values.yml b/...cification/test-suites/memtier_benchmark-100Kkeys-load-hash-50-fields-with-10B-values.yml
@@ -0,0 +1,30 @@
+version: 0.4
+name: memtier_benchmark-100Kkeys-load-hash-50-fields-with-10B-values
+description: Runs memtier_benchmark, for a keyspace length of 100K keys loading HASHES with 50 fields each. Each field value has a data size of 10 Bytes.
+dbconfig:
+  configuration-parameters:
+    save: '""'
+  check:
+    keyspacelen: 0
+  resources:
+    requests:
+      memory: 6g
+tested-groups:
+- hash
+tested-commands:
+- hset
+redis-topologies:
+- oss-standalone
+build-variants:
+- gcc:8.5.0-amd64-debian-buster-default
+- dockerhub
+clientconfig:
+  run_image: redislabs/memtier_benchmark:edge
+  tool: memtier_benchmark
+  arguments: --test-time 120 --distinct-client-seed "--data-size" "10" --command "HSET __key__ field:1 __data__ field:2 __data__ field:3 __data__ field:4 __data__ field:5 __data__ field:6 __data__ field:7 __data__ field:8 __data__ field:9 __data__ field:10 __data__ field:11 __data__ field:12 __data__ field:13 __data__ field:14 __data__ field:15 __data__ field:16 __data__ field:17 __data__ field:18 __data__ field:19 __data__ field:20 __data__ field:21 __data__ field:22 __data__ field:23 __data__ field:24 __data__ field:25 __data__ field:26 __data__ field:27 __data__ field:28 __data__ field:29 __data__ field:30 __data__ field:31 __data__ field:32 __data__ field:33 __data__ field:34 __data__ field:35 __data__ field:36 __data__ field:37 __data__ field:38 __data__ field:39 __data__ field:40 __data__ field:41 __data__ field:42 __data__ field:43 __data__ field:44 __data__ field:45 __data__ field:46 __data__ field:47 __data__ field:48 __data__ field:49 __data__ field:50 __data__" --command-key-pattern="R" --key-minimum=1 --key-maximum 100000 -c 50 -t 4 --hide-histogram
+  resources:
+    requests:
+      cpus: '4'
+      memory: 2g
+
+priority: 5
diff --git a/...fication/test-suites/memtier_benchmark-10Kkeys-load-hash-50-fields-with-10000B-values.yml b/...fication/test-suites/memtier_benchmark-10Kkeys-load-hash-50-fields-with-10000B-values.yml
@@ -0,0 +1,30 @@
+version: 0.4
+name: memtier_benchmark-10Kkeys-load-hash-50-fields-with-10000B-values
+description: Runs memtier_benchmark, for a keyspace length of 100K keys loading HASHES with 50 fields each. Each field value has a data size of 1000 Bytes.
+dbconfig:
+  configuration-parameters:
+    save: '""'
+  check:
+    keyspacelen: 0
+  resources:
+    requests:
+      memory: 6g
+tested-groups:
+- hash
+tested-commands:
+- hset
+redis-topologies:
+- oss-standalone
+build-variants:
+- gcc:8.5.0-amd64-debian-buster-default
+- dockerhub
+clientconfig:
+  run_image: redislabs/memtier_benchmark:edge
+  tool: memtier_benchmark
+  arguments: --test-time 120 --distinct-client-seed "--data-size" "10000" --command "HSET __key__ field:1 __data__ field:2 __data__ field:3 __data__ field:4 __data__ field:5 __data__ field:6 __data__ field:7 __data__ field:8 __data__ field:9 __data__ field:10 __data__ field:11 __data__ field:12 __data__ field:13 __data__ field:14 __data__ field:15 __data__ field:16 __data__ field:17 __data__ field:18 __data__ field:19 __data__ field:20 __data__ field:21 __data__ field:22 __data__ field:23 __data__ field:24 __data__ field:25 __data__ field:26 __data__ field:27 __data__ field:28 __data__ field:29 __data__ field:30 __data__ field:31 __data__ field:32 __data__ field:33 __data__ field:34 __data__ field:35 __data__ field:36 __data__ field:37 __data__ field:38 __data__ field:39 __data__ field:40 __data__ field:41 __data__ field:42 __data__ field:43 __data__ field:44 __data__ field:45 __data__ field:46 __data__ field:47 __data__ field:48 __data__ field:49 __data__ field:50 __data__" --command-key-pattern="R" --key-minimum=1 --key-maximum 10000 -c 50 -t 4 --hide-histogram
+  resources:
+    requests:
+      cpus: '4'
+      memory: 2g
+
+priority: 5