diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index e5fe1d25b..06f6fbfd6 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -83,9 +83,6 @@ jobs:
             fi
 
   test:
-    needs: 
-      - filter-pr-changes
-      - determine-revision
     strategy:
       fail-fast: false
       matrix:
@@ -105,8 +102,8 @@ jobs:
           cache-dependency-path: pyproject.toml
       - name: "Install dependencies"
         run: python -m pip install .[develop]
-      - name: "Run tests${{ needs.filter-pr-changes.outputs.track_filter }}${{ needs.determine-revision.outputs.revision }}"
-        run: hatch -v -e unit run test${{ needs.filter-pr-changes.outputs.track_filter }}${{ needs.determine-revision.outputs.revision }}
+      - name: "Run tests"
+        run: hatch -v -e unit run test
       - uses: elastic/es-perf-github-status@v2
         if: ${{ failure() && ( github.event_name == 'schedule' || ( github.event_name == 'push' && github.ref_name == env.DEFAULT_BRANCH ) ) }}
         with:
@@ -128,6 +125,9 @@ jobs:
 
     name: rally-tracks-compat ${{ matrix.python-version }}
     steps:
+      - name: Check public IP address
+        run: curl -4s ifconfig.me
+        continue-on-error: true
       - uses: actions/checkout@v4
       - uses: actions/setup-python@v5
         with:
@@ -140,15 +140,30 @@ jobs:
           java-version: "21"
       - run: echo "JAVA21_HOME=$JAVA_HOME_21_X64" >> $GITHUB_ENV
       - run: echo "JAVA11_HOME=$JAVA_HOME_11_X64" >> $GITHUB_ENV
+      - name: Free Disk Space
+        continue-on-error: true
+        uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be
+        with:
+          android: true
+          dotnet: true
+          haskell: true
+          large-packages: false
+          docker-images: false
+          swap-storage: false
+          tool-cache: false
+      - name: Check disk space before
+        run: df -h
       - name: "Install dependencies"
         run: python -m pip install .[develop]
-      - name: "Run tests${{ needs.filter-pr-changes.outputs.track_filter }}${{ needs.determine-revision.outputs.revision }}"
-        run: hatch -v -e it run test${{ needs.filter-pr-changes.outputs.track_filter }}${{ needs.determine-revision.outputs.revision }}
-        timeout-minutes: 120
+      - name: "Run tests${{ needs.filter-pr-changes.outputs.track_filter }}${{ needs.determine-revision.outputs.revision }} --source-build-release"
+        run: hatch -v -e it run test${{ needs.filter-pr-changes.outputs.track_filter }}${{ needs.determine-revision.outputs.revision }} --source-build-release
+        timeout-minutes: 160
         env:
           # elastic/endpoint fetches assets from GitHub, authenticate to avoid
           # being rate limited
           ASSETS_AUTH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+      - name: Check disk space after
+        run: df -h
       - uses: elastic/es-perf-github-status@v2
         if: ${{ failure() && ( github.event_name == 'schedule' || ( github.event_name == 'push' && github.ref_name == env.DEFAULT_BRANCH ) ) }}
         with:
diff --git a/elastic/Makefile b/elastic/Makefile
index 1d5508e16..f4e0d85aa 100644
--- a/elastic/Makefile
+++ b/elastic/Makefile
@@ -59,7 +59,7 @@ install: venv-create
 	# install pytest for tests
 	. $(VENV_ACTIVATE_FILE); pip3 install pytest==6.2.5 pytest-benchmark==3.2.2
 	# install dependencies for tests
-	. $(VENV_ACTIVATE_FILE); pip3 install geneve==0.0.3 pytest-asyncio==0.18.1 git+https://github.com/elastic/package-assets.git
+	. $(VENV_ACTIVATE_FILE); pip3 install geneve==0.3.0 pytest-asyncio==0.18.1 git+https://github.com/elastic/package-assets.git
 	# install (latest) Rally for smoke tests
 	. $(VENV_ACTIVATE_FILE); pip3 install git+ssh://git@github.com/elastic/rally.git --use-feature=2020-resolver
 
diff --git a/elastic/security/README.md b/elastic/security/README.md
index cc9e6221e..13186ac2d 100644
--- a/elastic/security/README.md
+++ b/elastic/security/README.md
@@ -148,7 +148,7 @@ This challenge executes indexing and querying sequentially. Queries will be issu
 
 ### Generate source events for detection rules (generate-alerts-source-events)
 
-This challenge is a demo usage of [Geneve](https://github.com/elastic/geneve) via the `events-emitter-source` [parameter source](https://github.com/elastic/rally-tracks/blob/master/elastic/security/parameter_sources/events_emitter.py), it generates source events but does not interact with anything else. It's executed as part of the [it/test_security.py](https://github.com/elastic/rally-tracks/blob/master/it/test_security.py) integration tests. Currently, Geneve is pinned to version [v0.2.0](https://github.com/elastic/rally-tracks/blob/master/elastic/security/track.json#L410). This is the only challenge that depends on Geneve and pyyaml (Geneve requires pyyaml).
+This challenge is a demo usage of [Geneve](https://github.com/elastic/geneve) via the `events-emitter-source` [parameter source](https://github.com/elastic/rally-tracks/blob/master/elastic/security/parameter_sources/events_emitter.py), it generates source events but does not interact with anything else. It's executed as part of the [it/test_security.py](https://github.com/elastic/rally-tracks/blob/master/it/test_security.py) integration tests. Currently, Geneve is pinned to version [v0.3.0](https://github.com/elastic/rally-tracks/blob/master/elastic/security/track.json#L410). This is the only challenge that depends on Geneve and pyyaml (Geneve requires pyyaml).
 
 ## Ratios
 
diff --git a/elastic/security/track.json b/elastic/security/track.json
index 1a14aff97..e7f612266 100644
--- a/elastic/security/track.json
+++ b/elastic/security/track.json
@@ -352,7 +352,7 @@
 {% endfor %}
   ],
   "dependencies": [
-    "geneve==0.2.0",
+    "geneve==0.3.0",
     "pyyaml",
     "elastic-transport==8.4.1",
     "elasticsearch==8.6.1"
diff --git a/elastic/shared/parameter_sources/__init__.py b/elastic/shared/parameter_sources/__init__.py
index 909884e44..09ede38ec 100644
--- a/elastic/shared/parameter_sources/__init__.py
+++ b/elastic/shared/parameter_sources/__init__.py
@@ -6,7 +6,7 @@
 DEFAULT_MAX_DATE = "2020-01-01"
 
 # this provides a universal start date for `now` if we are using it as the current time
-now = datetime.utcnow().replace(tzinfo=timezone.utc)
+now = datetime.now(tz=timezone.utc)
 
 
 def utc_now():
diff --git a/elastic/shared/parameter_sources/processed.py b/elastic/shared/parameter_sources/processed.py
index 8c9e96d9c..528b333ce 100644
--- a/elastic/shared/parameter_sources/processed.py
+++ b/elastic/shared/parameter_sources/processed.py
@@ -79,7 +79,7 @@ def __init__(self, track, params, **kwargs):
         self._volume_per_day_gb = convert_to_gib(raw_volume_per_day)
         self.start_time = int(time.perf_counter())
         self._profile = params.get("profile", "fixed_interval")
-        now = datetime.utcnow().replace(tzinfo=timezone.utc)
+        now = datetime.now(tz=timezone.utc)
 
         def utc_now():
             return now
diff --git a/elastic/shared/parameter_sources/workflow_selector.py b/elastic/shared/parameter_sources/workflow_selector.py
index 9d8a149ea..34da6a2ee 100644
--- a/elastic/shared/parameter_sources/workflow_selector.py
+++ b/elastic/shared/parameter_sources/workflow_selector.py
@@ -59,8 +59,8 @@ def __init__(self, track, params, **kwargs):
         self.logger.info("Workflow [%s] is using seed [%s]", self.workflow, self.random_seed)
         self.number_of_tasks = track.selected_challenge_or_default.parameters.get("number-of-workflows")
         # for testing purposes only we allow a configurable now function
-        self._utc_now = kwargs.get("utc_now", datetime.utcnow)
-        self._init_date = self._utc_now().replace(tzinfo=timezone.utc)
+        self._utc_now = kwargs.get("utc_now", lambda: datetime.now(tz=timezone.utc))
+        self._init_date = self._utc_now()
         self._detailed_results = params.get(
             "detailed-results", track.selected_challenge_or_default.parameters.get("detailed-results", False)
         )
@@ -266,7 +266,7 @@ def copy_and_modify_action(self, action):
         else:
             # process fields - use the start_date + the time passed since we started, as the time
             # all dates for the action should be the same
-            query_max_date = self._max_date_start + (self._utc_now().replace(tzinfo=timezone.utc) - self._init_date)
+            query_max_date = self._max_date_start + (self._utc_now() - self._init_date)
 
         for query_handler in self.workflow_handlers[action_id]:
             # scale the duration based on the max if set
diff --git a/elastic/shared/query_handlers/date_histogram.py b/elastic/shared/query_handlers/date_histogram.py
index 80c4351a2..d02d5fedf 100644
--- a/elastic/shared/query_handlers/date_histogram.py
+++ b/elastic/shared/query_handlers/date_histogram.py
@@ -33,8 +33,8 @@ def read_ranges(self):
             self.request_body["time_zone"] = "UTC"
             if "min" in self.extended_bounds and "max" in self.extended_bounds:
                 try:
-                    self.max_bound = datetime.datetime.utcfromtimestamp(int(self.extended_bounds["max"]) / 1000)
-                    self.min_bound = datetime.datetime.utcfromtimestamp(int(self.extended_bounds["min"]) / 1000)
+                    self.max_bound = datetime.datetime.fromtimestamp(int(self.extended_bounds["max"]) / 1000, tz=datetime.timezone.utc)
+                    self.min_bound = datetime.datetime.fromtimestamp(int(self.extended_bounds["min"]) / 1000, tz=datetime.timezone.utc)
                 except ValueError:
                     raise exceptions.TrackConfigError(
                         f"Date Histogram aggregation requires epoch milliseconds for its "
diff --git a/elastic/shared/utils/time.py b/elastic/shared/utils/time.py
index 6af94e844..807ba5b2f 100644
--- a/elastic/shared/utils/time.py
+++ b/elastic/shared/utils/time.py
@@ -77,7 +77,7 @@ def parse_interval(offset: str) -> Optional[timedelta]:
         raise TimeParsingError(f"Invalid offset: {offset}")
 
 
-def parse_date_time(point: str, utcnow: Callable[..., datetime] = datetime.utcnow) -> Optional[datetime]:
+def parse_date_time(point: str, utcnow: Callable[..., datetime] = lambda: datetime.now(tz=timezone.utc)) -> Optional[datetime]:
     now = "now"
     if not point:
         return None
diff --git a/elastic/tests/__init__.py b/elastic/tests/__init__.py
index 8f9ad6667..ca4b17e60 100644
--- a/elastic/tests/__init__.py
+++ b/elastic/tests/__init__.py
@@ -24,7 +24,12 @@ def as_future(result=None, exception=None):
     :param exception: Exceptional result.
     :return: The corresponding future.
     """
-    f = asyncio.Future()
+    try:
+        loop = asyncio.get_running_loop()
+    except RuntimeError:
+        loop = asyncio.new_event_loop()
+        asyncio.set_event_loop(loop)
+    f = loop.create_future()
     if exception and result:
         raise AssertionError("Specify a result or an exception but not both")
     if exception:
diff --git a/elastic/tests/query_handlers/date_histogram_test.py b/elastic/tests/query_handlers/date_histogram_test.py
index 42655fad4..b97f1a373 100644
--- a/elastic/tests/query_handlers/date_histogram_test.py
+++ b/elastic/tests/query_handlers/date_histogram_test.py
@@ -71,7 +71,7 @@ def test_process_contract_bounds_with_min_date():
         "fixed_interval": "1m",
     }
     date_histogram_handler = DateHistogramHandler(date_histogram_agg)
-    min_date = datetime.datetime.utcfromtimestamp(1606911780).replace(tzinfo=datetime.timezone.utc)
+    min_date = datetime.datetime.fromtimestamp(1606911780, tz=datetime.timezone.utc)
     date_histogram_handler.process(DateTimeValues(min_date=min_date, max_date=start, duration=None))
     assert date_histogram_agg["extended_bounds"]["min"] == 1606911780000
     assert date_histogram_agg["extended_bounds"]["max"] == 1606912380000
diff --git a/elastic/tests/query_handlers/range_query_test.py b/elastic/tests/query_handlers/range_query_test.py
index 4ed723456..04a1bd078 100644
--- a/elastic/tests/query_handlers/range_query_test.py
+++ b/elastic/tests/query_handlers/range_query_test.py
@@ -125,7 +125,7 @@ def test_invalid_time_date():
                     "format": "strict_date_optional_time",
                 }
             }
-        ).process(DateTimeValues(min_date=None, max_date=datetime.datetime.utcnow(), duration=None))
+        ).process(DateTimeValues(min_date=None, max_date=datetime.datetime.now(tz=datetime.timezone.utc), duration=None))
     assert rae.value.args[0] == "Invalid time format: 2020-11-30T:16:59.340Z"
 
 
@@ -138,7 +138,7 @@ def test_missing_gte():
                     "format": "strict_date_optional_time",
                 }
             }
-        ).process(DateTimeValues(min_date=None, max_date=datetime.datetime.utcnow(), duration=None))
+        ).process(DateTimeValues(min_date=None, max_date=datetime.datetime.now(tz=datetime.timezone.utc), duration=None))
     assert (
         rae.value.message == 'Range query for date does not have both "gte" or "gt" and '
         "\"lte\" or \"lt\" key - [{'@timestamp': {'lte': '2020-12-01T12:16:59.340Z', "
@@ -155,7 +155,7 @@ def test_missing_lte():
                     "format": "strict_date_optional_time",
                 }
             }
-        ).process(DateTimeValues(min_date=None, max_date=datetime.datetime.utcnow(), duration=None))
+        ).process(DateTimeValues(min_date=None, max_date=datetime.datetime.now(tz=datetime.timezone.utc), duration=None))
     assert (
         rae.value.message == 'Range query for date does not have both "gte" or "gt" and '
         "\"lte\" or \"lt\" key - [{'@timestamp': {'gte': '2020-12-01T12:16:59.340Z', "
@@ -166,7 +166,7 @@ def test_missing_lte():
 def test_pass_through():
     range_query = {"http.status.code": {"gte": 200, "lte": 300}}
     range_query_handler = RangeQueryHandler(range_query)
-    range_query_handler.process(DateTimeValues(min_date=None, max_date=datetime.datetime.utcnow(), duration=None))
+    range_query_handler.process(DateTimeValues(min_date=None, max_date=datetime.datetime.now(tz=datetime.timezone.utc), duration=None))
     assert range_query["http.status.code"]["gte"] == 200
     assert range_query["http.status.code"]["lte"] == 300
 
diff --git a/elastic/tests/utils/time_test.py b/elastic/tests/utils/time_test.py
index a866eaadc..6a9bfd630 100644
--- a/elastic/tests/utils/time_test.py
+++ b/elastic/tests/utils/time_test.py
@@ -138,7 +138,7 @@ def test_generate_new_bounds_preserve_interval():
     upper_bound = parse_date_optional_time("2020-01-03T12:00:00.000Z")
     lower_bound = parse_date_optional_time("2020-01-02T12:00:00.000Z")
 
-    utc_now = datetime.datetime.utcnow()
+    utc_now = datetime.datetime.now(tz=datetime.timezone.utc)
     date_data = DateTimeValues(min_date=None, max_date=utc_now, duration=None)
 
     new_lower, new_upper = date_data.generate_new_bounds(lower_bound, upper_bound)
@@ -151,7 +151,7 @@ def test_generate_new_bounds_replace_interval():
     upper_bound = parse_date_optional_time("2020-01-03T12:00:00.000Z")
     lower_bound = parse_date_optional_time("2020-01-02T12:00:00.000Z")
 
-    utc_now = datetime.datetime.utcnow()
+    utc_now = datetime.datetime.now(tz=datetime.timezone.utc)
     date_data = DateTimeValues(min_date=None, max_date=utc_now, duration=datetime.timedelta(minutes=1))
 
     new_lower, new_upper = date_data.generate_new_bounds(lower_bound, upper_bound)
@@ -172,7 +172,7 @@ def test_generate_new_bounds_respects_min_and_max_date():
 
 
 def test_calendar_intervals():
-    utc_now = datetime.datetime.utcnow()
+    utc_now = datetime.datetime.now(tz=datetime.timezone.utc)
     date_data = DateTimeValues(None, utc_now, None)
     assert date_data.calendar_interval is None
 
@@ -187,7 +187,7 @@ def test_calendar_intervals():
 
 
 def test_fixed_intervals():
-    utc_now = datetime.datetime.utcnow()
+    utc_now = datetime.datetime.now(tz=datetime.timezone.utc)
     date_data = DateTimeValues(None, utc_now, None)
     assert date_data.calendar_interval is None
 
diff --git a/it/conftest.py b/it/conftest.py
index a34e4c534..14f048d81 100644
--- a/it/conftest.py
+++ b/it/conftest.py
@@ -24,3 +24,8 @@ def es_cluster_cleanup(es_cluster):
     es = Elasticsearch(f"http://localhost:{es_cluster.http_port}")
     es.indices.delete(index="*")
     es.indices.delete_data_stream(name="*")
+
+
+@pytest.fixture
+def es_release_build(es_cluster) -> bool:
+    return es_cluster.source_build_release
diff --git a/it/logs/test_logs_unmapped.py b/it/logs/test_logs_unmapped.py
index 2cf365db4..7c07a07ed 100644
--- a/it/logs/test_logs_unmapped.py
+++ b/it/logs/test_logs_unmapped.py
@@ -24,7 +24,9 @@
 
 @pytest.mark.track("elastic/logs")
 class TestLogsUnmapped:
-    def test_logs_chicken(self, es_cluster, rally):
+    def test_logs_chicken(self, es_cluster, rally, es_release_build):
+        if es_release_build:
+            pytest.skip("logging-insist-chicken is not supported on release builds")
         custom = {"mapping": "unmapped"}
         ret = rally.race(
             track="elastic/logs",
diff --git a/it/test_all_tracks_and_challenges.py b/it/test_all_tracks_and_challenges.py
index 3f4267404..9ec60c55a 100644
--- a/it/test_all_tracks_and_challenges.py
+++ b/it/test_all_tracks_and_challenges.py
@@ -36,9 +36,12 @@ class TestTrackRepository:
         "nyc_taxis": ["update-aggs-only"],
     }
     skip_challenges = {"esql": ["query-searchable-snapshot"]}
+    snapshot_only_challenges = {"wikipedia": ["esql-full-text-functions"]}
 
-    def test_autogenerated(self, es_cluster, rally, track, challenge, rally_options, es_cluster_cleanup):
+    def test_autogenerated(self, es_cluster, rally, track, challenge, rally_options, es_cluster_cleanup, es_release_build):
         track_params = {}
+        if es_release_build and challenge in self.snapshot_only_challenges.get(track, []):
+            pytest.skip(f"{track}-{challenge} is not supported on release builds")
         if track not in self.skip_tracks and challenge not in self.skip_challenges.get(track, []):
             if challenge in self.disable_assertions.get(track, []):
                 rally_options.update({"enable_assertions": False})
@@ -47,4 +50,4 @@ def test_autogenerated(self, es_cluster, rally, track, challenge, rally_options,
             ret = rally.race(track=track, challenge=challenge, track_params=track_params, **rally_options)
             assert ret == 0
         else:
-            pytest.skip(msg=f"{track}-{challenge} included in skip list")
+            pytest.skip(f"{track}-{challenge} included in skip list")
diff --git a/nyc_taxis/challenges/default.json b/nyc_taxis/challenges/default.json
index 7511e297c..638fa1feb 100644
--- a/nyc_taxis/challenges/default.json
+++ b/nyc_taxis/challenges/default.json
@@ -784,6 +784,10 @@
           "operation": "delete-index",
           "tags": ["setup"]
         },
+        {
+          "operation": "delete-nyc-taxis-sample-index",
+          "tags": ["setup"]
+        },
         {
           "operation": {
             "operation-type": "create-index",
@@ -926,6 +930,15 @@
           "operation": "refresh",
           "tags": ["setup"]
         },
+        {
+          "operation": "create-nyc-taxis-sample-index",
+          "tags": ["setup"]
+        },
+        {
+          "name": "refresh-after-sample-index",
+          "operation": "refresh",
+          "tags": ["setup"]
+        },
         {
           "operation": "avg_passenger_count_aggregation",
           "clients": 1,
@@ -941,24 +954,24 @@
         },
       {# non-serverless-inlinestats-marker-start #}{%- if build_flavor != "serverless" -%}
         {
-          "operation": "one_inlinestats_sum_esql",
+          "operation": "one_chained_inlinestats_esql",
           "clients": 1,
-          "warmup-iterations": 10,
-          "iterations": 50,
+          "warmup-iterations": 5,
+          "iterations": 20,
           "tags": ["inlinestats"]
         },
         {
-          "operation": "two_inlinestats_sum_esql",
+          "operation": "two_chained_inlinestats_esql",
           "clients": 1,
-          "warmup-iterations": 10,
-          "iterations": 50,
+          "warmup-iterations": 5,
+          "iterations": 20,
           "tags": ["inlinestats"]
         },
         {
-          "operation": "three_inlinestats_sum_esql",
+          "operation": "three_chained_inlinestats_esql",
           "clients": 1,
-          "warmup-iterations": 10,
-          "iterations": 50,
+          "warmup-iterations": 5,
+          "iterations": 20,
           "tags": ["inlinestats"]
         },
       {%- endif -%}{# non-serverless-inlinestats-marker-end #}
@@ -1360,45 +1373,45 @@
           "tags": ["inlinestats"]
         },
         {
-          "operation": "stats_count_comparison_esql",
+          "operation": "stats_count_group_by_esql",
           "clients": 1,
           "warmup-iterations": 10,
-          "iterations": 50,
+          "iterations": 20,
           "tags": ["inlinestats"]
         },
         {
-          "operation": "inlinestats_count_comparison_esql",
+          "operation": "inlinestats_count_group_by_esql",
           "clients": 1,
           "warmup-iterations": 10,
-          "iterations": 50,
+          "iterations": 20,
           "tags": ["inlinestats"]
         },
         {
-          "operation": "stats_avg_comparison_esql",
+          "operation": "stats_avg_group_by_esql",
           "clients": 1,
           "warmup-iterations": 10,
-          "iterations": 50,
+          "iterations": 20,
           "tags": ["inlinestats"]
         },
         {
-          "operation": "inlinestats_avg_comparison_esql",
+          "operation": "inlinestats_avg_group_by_esql",
           "clients": 1,
           "warmup-iterations": 10,
-          "iterations": 50,
+          "iterations": 20,
           "tags": ["inlinestats"]
         },
         {
-          "operation": "stats_max_comparison_esql",
+          "operation": "stats_max_group_by_esql",
           "clients": 1,
           "warmup-iterations": 10,
-          "iterations": 50,
+          "iterations": 20,
           "tags": ["inlinestats"]
         },
         {
-          "operation": "inlinestats_max_comparison_esql",
+          "operation": "inlinestats_max_group_by_esql",
           "clients": 1,
           "warmup-iterations": 10,
-          "iterations": 50,
+          "iterations": 20,
           "tags": ["inlinestats"]
         },
         {
diff --git a/nyc_taxis/operations/default.json b/nyc_taxis/operations/default.json
index b5a265593..ea13e0e58 100644
--- a/nyc_taxis/operations/default.json
+++ b/nyc_taxis/operations/default.json
@@ -956,6 +956,28 @@
         }
       }
     },
+    {
+      "name": "create-nyc-taxis-sample-index",
+      "operation-type": "raw-request",
+      "index": "nyc_taxis",
+      "method": "POST",
+      "path": "/_reindex",
+      "body": {
+        "source": {
+          "index": "nyc_taxis"
+        },
+        "max_docs": 1000,
+        "dest": {
+          "index": "nyc_taxis_sample"
+        }
+      }
+    },
+    {
+      "name": "delete-nyc-taxis-sample-index",
+      "operation-type": "delete-index",
+      "index": "nyc_taxis_sample",
+      "only-if-exists": true
+    },
     {
       "name": "avg_passenger_count_esql_segment_partitioning",
       "operation-type": "esql",
@@ -964,32 +986,32 @@
     {
       "name": "inlinestats_avg_esql_segment_partitioning",
       "operation-type": "esql",
-      "query" : "FROM nyc_taxis | inlinestats avg(passenger_count)"
+      "query" : "FROM nyc_taxis | inline stats avg(passenger_count)"
     },
     {
       "name": "inlinestats_count_esql_segment_partitioning",
       "operation-type": "esql",
-      "query" : "FROM nyc_taxis | inlinestats count(passenger_count)"
+      "query" : "FROM nyc_taxis | inline stats count(passenger_count)"
     },
     {
       "name": "inlinestats_median_esql_segment_partitioning",
       "operation-type": "esql",
-      "query" : "FROM nyc_taxis | inlinestats median(passenger_count)"
+      "query" : "FROM nyc_taxis | inline stats median(passenger_count)"
     },
     {
       "name": "inlinestats_max_esql_segment_partitioning",
       "operation-type": "esql",
-      "query" : "FROM nyc_taxis | inlinestats max(passenger_count)"
+      "query" : "FROM nyc_taxis | inline stats max(passenger_count)"
     },
     {
       "name": "inlinestats_sum_esql_segment_partitioning",
       "operation-type": "esql",
-      "query" : "FROM nyc_taxis | inlinestats sum(passenger_count)"
+      "query" : "FROM nyc_taxis | inline stats sum(passenger_count)"
     },
     {
       "name": "inlinestats_top_esql_segment_partitioning",
       "operation-type": "esql",
-      "query" : "FROM nyc_taxis | inlinestats top(passenger_count, 3, \"desc\")"
+      "query" : "FROM nyc_taxis | inline stats top(passenger_count, 3, \"desc\")"
     },
     {
       "name": "stats_count_esql_segment_partitioning",
@@ -1017,95 +1039,95 @@
       "query" : "FROM nyc_taxis | stats top(passenger_count, 3, \"desc\")"
     },
     {
-      "name": "stats_count_comparison_esql",
+      "name": "stats_count_group_by_esql",
       "operation-type": "esql",
-      "query" : "FROM nyc_taxis METADATA _id | LIMIT 1000 | stats count(passenger_count) by _id"
+      "query" : "FROM nyc_taxis_sample METADATA _id | stats count(passenger_count) by _id"
     },
     {
-      "name": "inlinestats_count_comparison_esql",
+      "name": "inlinestats_count_group_by_esql",
       "operation-type": "esql",
-      "query" : "FROM nyc_taxis METADATA _id | LIMIT 1000 | inlinestats count(passenger_count) by _id"
+      "query" : "FROM nyc_taxis_sample METADATA _id | inline stats count(passenger_count) by _id"
     },
     {
-      "name": "stats_avg_comparison_esql",
+      "name": "stats_avg_group_by_esql",
       "operation-type": "esql",
-      "query" : "FROM nyc_taxis METADATA _id | LIMIT 1000 | stats avg(passenger_count) by _id"
+      "query" : "FROM nyc_taxis_sample METADATA _id | stats avg(passenger_count) by _id"
     },
     {
-      "name": "inlinestats_avg_comparison_esql",
+      "name": "inlinestats_avg_group_by_esql",
       "operation-type": "esql",
-      "query" : "FROM nyc_taxis METADATA _id | LIMIT 1000 | inlinestats avg(passenger_count) by _id"
+      "query" : "FROM nyc_taxis_sample METADATA _id | inline stats avg(passenger_count) by _id"
     },
     {
-      "name": "stats_max_comparison_esql",
+      "name": "stats_max_group_by_esql",
       "operation-type": "esql",
-      "query" : "FROM nyc_taxis METADATA _id | LIMIT 1000 | stats max(passenger_count) by _id"
+      "query" : "FROM nyc_taxis_sample METADATA _id | stats max(passenger_count) by _id"
     },
     {
-      "name": "inlinestats_max_comparison_esql",
+      "name": "inlinestats_max_group_by_esql",
       "operation-type": "esql",
-      "query" : "FROM nyc_taxis METADATA _id | LIMIT 1000 | inlinestats max(passenger_count) by _id"
+      "query" : "FROM nyc_taxis_sample METADATA _id | inline stats max(passenger_count) by _id"
     },
     {
       "name": "inlinestats_then_stats_count_esql",
       "operation-type": "esql",
-      "query" : "FROM nyc_taxis | inlinestats c = count(passenger_count) | stats count(c)"
+      "query" : "FROM nyc_taxis | inline stats c = count(passenger_count) | stats count(c)"
     },
     {
       "name": "stats_then_inlinestats_count_esql",
       "operation-type": "esql",
-      "query" : "FROM nyc_taxis | stats c = count(passenger_count) | inlinestats count(c)"
+      "query" : "FROM nyc_taxis | stats c = count(passenger_count) | inline stats count(c)"
     },
     {
       "name": "inlinestats_then_stats_sum_esql",
       "operation-type": "esql",
-      "query" : "FROM nyc_taxis | inlinestats s = sum(passenger_count) | stats sum(s)"
+      "query" : "FROM nyc_taxis | inline stats s = sum(passenger_count) | stats sum(s)"
     },
     {
       "name": "stats_then_inlinestats_sum_esql",
       "operation-type": "esql",
-      "query" : "FROM nyc_taxis | stats s = sum(passenger_count) | inlinestats sum(s)"
+      "query" : "FROM nyc_taxis | stats s = sum(passenger_count) | inline stats sum(s)"
     },
     {
       "name": "inlinestats_then_stats_avg_esql",
       "operation-type": "esql",
-      "query" : "FROM nyc_taxis | inlinestats a = avg(passenger_count) | stats avg(a)"
+      "query" : "FROM nyc_taxis | inline stats a = avg(passenger_count) | stats avg(a)"
     },
     {
       "name": "stats_then_inlinestats_avg_esql",
       "operation-type": "esql",
-      "query" : "FROM nyc_taxis | stats a = avg(passenger_count) | inlinestats avg(a)"
+      "query" : "FROM nyc_taxis | stats a = avg(passenger_count) | inline stats avg(a)"
     },
     {
-      "name": "one_inlinestats_sum_esql",
+      "name": "one_chained_inlinestats_esql",
       "operation-type": "esql",
-      "query" : "FROM nyc_taxis | LIMIT 1000 | inlinestats s1 = sum(passenger_count)"
+      "query" : "FROM nyc_taxis_sample | inline stats s1 = sum(passenger_count)"
     },
     {
-      "name": "two_inlinestats_sum_esql",
+      "name": "two_chained_inlinestats_esql",
       "operation-type": "esql",
-      "query" : "FROM nyc_taxis | LIMIT 1000 | inlinestats s1 = sum(passenger_count) | inlinestats s2 = sum(s1)"
+      "query" : "FROM nyc_taxis_sample | inline stats s1 = sum(passenger_count) | inline stats s2 = sum(trip_distance)"
     },
     {
-      "name": "three_inlinestats_sum_esql",
+      "name": "three_chained_inlinestats_esql",
       "operation-type": "esql",
-      "query" : "FROM nyc_taxis | LIMIT 1000 | inlinestats s1 = sum(passenger_count) | inlinestats s2 = sum(s1) | inlinestats s3 = sum(s2)"
+      "query" : "FROM nyc_taxis_sample | inline stats s1 = sum(passenger_count) | inline stats s2 = sum(trip_distance) | inline stats s3 = sum(total_amount)"
     },
     {
       "name": "multiple_stats_esql",
       "operation-type": "esql",
-      "query" : "FROM nyc_taxis | LIMIT 1000 | stats sum = sum(passenger_count), count = count(*), avg = avg(passenger_count)"
+      "query" : "FROM nyc_taxis_sample | stats sum = sum(passenger_count), count = count(*), avg = avg(passenger_count)"
     },
     {
       "name": "multiple_inlinestats_esql",
       "operation-type": "esql",
-      "query" : "FROM nyc_taxis | LIMIT 1000 | inlinestats sum = sum(passenger_count), count = count(*), avg = avg(passenger_count)"
+      "query" : "FROM nyc_taxis_sample | inline stats sum = sum(passenger_count), count = count(*), avg = avg(passenger_count)"
     },
   {# non-serverless-doc-partitioning-marker-start #}{%- if build_flavor != "serverless" -%}
     {
       "name": "inlinestats_avg_esql_doc_partitioning",
       "operation-type": "esql",
-      "query" : "FROM nyc_taxis | inlinestats avg(passenger_count)",
+      "query" : "FROM nyc_taxis | inline stats avg(passenger_count)",
       "body": {
         "accept_pragma_risks": true,
         "pragma": { "data_partitioning": "doc" }
@@ -1114,7 +1136,7 @@
     {
       "name": "inlinestats_count_esql_doc_partitioning",
       "operation-type": "esql",
-      "query" : "FROM nyc_taxis | inlinestats count(passenger_count)",
+      "query" : "FROM nyc_taxis | inline stats count(passenger_count)",
       "body": {
         "accept_pragma_risks": true,
         "pragma": { "data_partitioning": "doc" }
@@ -1123,7 +1145,7 @@
     {
       "name": "inlinestats_median_esql_doc_partitioning",
       "operation-type": "esql",
-      "query" : "FROM nyc_taxis | inlinestats median(passenger_count)",
+      "query" : "FROM nyc_taxis | inline stats median(passenger_count)",
       "body": {
         "accept_pragma_risks": true,
         "pragma": { "data_partitioning": "doc" }
@@ -1132,7 +1154,7 @@
     {
       "name": "inlinestats_max_esql_doc_partitioning",
       "operation-type": "esql",
-      "query" : "FROM nyc_taxis | inlinestats max(passenger_count)",
+      "query" : "FROM nyc_taxis | inline stats max(passenger_count)",
       "body": {
         "accept_pragma_risks": true,
         "pragma": { "data_partitioning": "doc" }
@@ -1141,7 +1163,7 @@
     {
       "name": "inlinestats_sum_esql_doc_partitioning",
       "operation-type": "esql",
-      "query" : "FROM nyc_taxis | inlinestats sum(passenger_count)",
+      "query" : "FROM nyc_taxis | inline stats sum(passenger_count)",
       "body": {
         "accept_pragma_risks": true,
         "pragma": { "data_partitioning": "doc" }
@@ -1150,7 +1172,7 @@
     {
       "name": "inlinestats_top_esql_doc_partitioning",
       "operation-type": "esql",
-      "query" : "FROM nyc_taxis | inlinestats top(passenger_count, 3, \"desc\")",
+      "query" : "FROM nyc_taxis | inline stats top(passenger_count, 3, \"desc\")",
       "body": {
         "accept_pragma_risks": true,
         "pragma": { "data_partitioning": "doc" }
@@ -1415,13 +1437,13 @@
     {
       "name": "inlinestats_avg_passenger_count_filtered_esql_segment_partitioning",
       "operation-type": "esql",
-      "query" : "FROM nyc_taxis | where total_amount > 60 and rate_code_id==\"2\"| inlinestats avg(passenger_count)"
+      "query" : "FROM nyc_taxis | where total_amount > 60 and rate_code_id==\"2\"| inline stats avg(passenger_count)"
     },
   {# non-serverless-doc-partitioning-marker-start #}{%- if build_flavor != "serverless" -%}
     {
       "name": "inlinestats_avg_passenger_count_filtered_esql_doc_partitioning",
       "operation-type": "esql",
-      "query" : "FROM nyc_taxis | where total_amount > 60 and rate_code_id==\"2\"| inlinestats avg(passenger_count)",
+      "query" : "FROM nyc_taxis | where total_amount > 60 and rate_code_id==\"2\"| inline stats avg(passenger_count)",
       "body": {
         "accept_pragma_risks": true,
         "pragma": { "data_partitioning": "doc" }
diff --git a/pyproject.toml b/pyproject.toml
index c6cb117f8..d92664fc1 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -34,13 +34,22 @@ exclude = [
 
 [tool.hatch.envs.default]
 dependencies = [
-    "esrally[develop] @ git+https://github.com/elastic/rally.git@master",
-    "pytest-rally @ git+https://github.com/elastic/pytest-rally.git@main",
+    "esrally[develop] @ git+https://github.com/elastic/rally.git@master"
+]
+
+[tool.hatch.envs.it]
+extra-dependencies = [
+    "pytest-rally @ git+https://github.com/elastic/pytest-rally.git@main"
+]
+
+[tool.hatch.envs.it_serverless]
+extra-dependencies = [
+    "pytest-rally @ git+https://github.com/elastic/pytest-rally.git@main"
 ]
 
 [tool.hatch.envs.unit]
 extra-dependencies = [
-    "geneve==0.0.3",
+    "geneve==0.3.0",
     "elastic-package-assets @ git+https://github.com/elastic/package-assets.git@main"
 ]