From b83b02df1006e32156b4207574ab95966948f6f9 Mon Sep 17 00:00:00 2001
From: John Mazanec <jmazane@amazon.com>
Date: Fri, 28 Jan 2022 09:03:10 -0800
Subject: [PATCH 1/2] [Benchmark] Remove ingest results collection

Removes collecting ingest results from the benchmarking tool. On big
data sets, this will prevent the process from going out of memory.

Signed-off-by: John Mazanec <jmazane@amazon.com>
---
 benchmarks/perf-tool/okpt/test/steps/steps.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/benchmarks/perf-tool/okpt/test/steps/steps.py b/benchmarks/perf-tool/okpt/test/steps/steps.py
index 9e70123c3d..9b2f212d9e 100644
--- a/benchmarks/perf-tool/okpt/test/steps/steps.py
+++ b/benchmarks/perf-tool/okpt/test/steps/steps.py
@@ -287,14 +287,12 @@ def _action(self):
         def action(doc_id):
             return {'index': {'_index': self.index_name, '_id': doc_id}}
 
-        index_responses = []
         for i in range(0, self.doc_count, self.bulk_size):
             partition = self.dataset.read(self.bulk_size)
             if partition is None:
                 break
             body = bulk_transform(partition, self.field_name, action, i)
-            result = bulk_index(self.opensearch, self.index_name, body)
-            index_responses.append(result)
+            bulk_index(self.opensearch, self.index_name, body)
 
         self.dataset.reset()
 

From 788f09bbdfadee84566522b149c90f1da43bb536 Mon Sep 17 00:00:00 2001
From: John Mazanec <jmazane@amazon.com>
Date: Fri, 28 Jan 2022 09:47:49 -0800
Subject: [PATCH 2/2] Add descriptive comment

Signed-off-by: John Mazanec <jmazane@amazon.com>
---
 benchmarks/perf-tool/okpt/test/steps/steps.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/benchmarks/perf-tool/okpt/test/steps/steps.py b/benchmarks/perf-tool/okpt/test/steps/steps.py
index 9b2f212d9e..7230bc1f4f 100644
--- a/benchmarks/perf-tool/okpt/test/steps/steps.py
+++ b/benchmarks/perf-tool/okpt/test/steps/steps.py
@@ -287,6 +287,8 @@ def _action(self):
         def action(doc_id):
             return {'index': {'_index': self.index_name, '_id': doc_id}}
 
+        # Maintain minimal state outside of this loop. For large data sets, too
+        # much state may cause out of memory failure
         for i in range(0, self.doc_count, self.bulk_size):
             partition = self.dataset.read(self.bulk_size)
             if partition is None: