From b83b02df1006e32156b4207574ab95966948f6f9 Mon Sep 17 00:00:00 2001 From: John Mazanec Date: Fri, 28 Jan 2022 09:03:10 -0800 Subject: [PATCH 1/2] [Benchmark] Remove ingest results collection Removes collecting ingest results from the benchmarking tool. On big data sets, this will prevent the process from going out of memory. Signed-off-by: John Mazanec --- benchmarks/perf-tool/okpt/test/steps/steps.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/benchmarks/perf-tool/okpt/test/steps/steps.py b/benchmarks/perf-tool/okpt/test/steps/steps.py index 9e70123c3d..9b2f212d9e 100644 --- a/benchmarks/perf-tool/okpt/test/steps/steps.py +++ b/benchmarks/perf-tool/okpt/test/steps/steps.py @@ -287,14 +287,12 @@ def _action(self): def action(doc_id): return {'index': {'_index': self.index_name, '_id': doc_id}} - index_responses = [] for i in range(0, self.doc_count, self.bulk_size): partition = self.dataset.read(self.bulk_size) if partition is None: break body = bulk_transform(partition, self.field_name, action, i) - result = bulk_index(self.opensearch, self.index_name, body) - index_responses.append(result) + bulk_index(self.opensearch, self.index_name, body) self.dataset.reset() From 788f09bbdfadee84566522b149c90f1da43bb536 Mon Sep 17 00:00:00 2001 From: John Mazanec Date: Fri, 28 Jan 2022 09:47:49 -0800 Subject: [PATCH 2/2] Add descriptive comment Signed-off-by: John Mazanec --- benchmarks/perf-tool/okpt/test/steps/steps.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/benchmarks/perf-tool/okpt/test/steps/steps.py b/benchmarks/perf-tool/okpt/test/steps/steps.py index 9b2f212d9e..7230bc1f4f 100644 --- a/benchmarks/perf-tool/okpt/test/steps/steps.py +++ b/benchmarks/perf-tool/okpt/test/steps/steps.py @@ -287,6 +287,8 @@ def _action(self): def action(doc_id): return {'index': {'_index': self.index_name, '_id': doc_id}} + # Maintain minimal state outside of this loop. For large data sets, too + # much state may cause out of memory failure for i in range(0, self.doc_count, self.bulk_size): partition = self.dataset.read(self.bulk_size) if partition is None: