From 59a512dcd8cceca12efd55816aa957aa31fd1fda Mon Sep 17 00:00:00 2001 From: Nimrod Shlagman Date: Tue, 24 Jan 2023 11:30:34 +0200 Subject: [PATCH 1/8] add elasticsearch sanitization --- CHANGELOG.md | 2 + .../instrumentation/elasticsearch/__init__.py | 18 +++- .../tests/test_elasticsearch.py | 87 ++++++++++++++----- 3 files changed, 81 insertions(+), 26 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 068a067dd1..5de776aa6d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added +- Add default query sanitization for elasticsearch db.statement attribute + ([#1545](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/1545)) - Add metric instrumentation for urllib ([#1553](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/1553)) - `opentelemetry/sdk/extension/aws` Implement [`aws.ecs.*`](https://github.com/open-telemetry/opentelemetry-specification/blob/main/specification/resource/semantic_conventions/cloud_provider/aws/ecs.md) and [`aws.logs.*`](https://opentelemetry.io/docs/reference/specification/resource/semantic_conventions/cloud_provider/aws/logs/) resource attributes in the `AwsEcsResourceDetector` detector when the ECS Metadata v4 is available diff --git a/instrumentation/opentelemetry-instrumentation-elasticsearch/src/opentelemetry/instrumentation/elasticsearch/__init__.py b/instrumentation/opentelemetry-instrumentation-elasticsearch/src/opentelemetry/instrumentation/elasticsearch/__init__.py index e2be4a3047..554e509cbc 100644 --- a/instrumentation/opentelemetry-instrumentation-elasticsearch/src/opentelemetry/instrumentation/elasticsearch/__init__.py +++ b/instrumentation/opentelemetry-instrumentation-elasticsearch/src/opentelemetry/instrumentation/elasticsearch/__init__.py @@ -135,11 +135,16 @@ def _instrument(self, **kwargs): tracer = get_tracer(__name__, __version__, tracer_provider) request_hook = kwargs.get("request_hook") response_hook = kwargs.get("response_hook") + sanitize_query = kwargs.get("sanitize_query", True) _wrap( elasticsearch, "Transport.perform_request", _wrap_perform_request( - tracer, self._span_name_prefix, request_hook, response_hook + tracer, + sanitize_query, + self._span_name_prefix, + request_hook, + response_hook, ), ) @@ -154,7 +159,11 @@ def _uninstrument(self, **kwargs): def _wrap_perform_request( - tracer, span_name_prefix, request_hook=None, response_hook=None + tracer, + sanitize_query, + span_name_prefix, + request_hook=None, + response_hook=None, ): # pylint: disable=R0912,R0914 def wrapper(wrapped, _, args, kwargs): @@ -214,7 +223,10 @@ def wrapper(wrapped, _, args, kwargs): if method: attributes["elasticsearch.method"] = method if body: - attributes[SpanAttributes.DB_STATEMENT] = str(body) + statement = str(body) + if sanitize_query: + statement = "sanitized" + attributes[SpanAttributes.DB_STATEMENT] = statement if params: attributes["elasticsearch.params"] = str(params) if doc_id: diff --git a/instrumentation/opentelemetry-instrumentation-elasticsearch/tests/test_elasticsearch.py b/instrumentation/opentelemetry-instrumentation-elasticsearch/tests/test_elasticsearch.py index 1a2cf30738..c319935569 100644 --- a/instrumentation/opentelemetry-instrumentation-elasticsearch/tests/test_elasticsearch.py +++ b/instrumentation/opentelemetry-instrumentation-elasticsearch/tests/test_elasticsearch.py @@ -42,7 +42,6 @@ else: from . import helpers_es2 as helpers # pylint: disable=no-name-in-module - Article = helpers.Article @@ -50,10 +49,26 @@ "elasticsearch.connection.http_urllib3.Urllib3HttpConnection.perform_request" ) class TestElasticsearchIntegration(TestBase): + search_attributes = { + SpanAttributes.DB_SYSTEM: "elasticsearch", + "elasticsearch.url": "/test-index/_search", + "elasticsearch.method": helpers.dsl_search_method, + "elasticsearch.target": "test-index", + SpanAttributes.DB_STATEMENT: str( + {"query": {"bool": {"filter": [{"term": {"author": "testing"}}]}}} + ), + } + + create_attributes = { + SpanAttributes.DB_SYSTEM: "elasticsearch", + "elasticsearch.url": "/test-index", + "elasticsearch.method": "HEAD", + } + def setUp(self): super().setUp() self.tracer = self.tracer_provider.get_tracer(__name__) - ElasticsearchInstrumentor().instrument() + ElasticsearchInstrumentor().instrument(sanitize_query=False) def tearDown(self): super().tearDown() @@ -241,21 +256,34 @@ def test_dsl_search(self, request_mock): self.assertIsNotNone(span.end_time) self.assertEqual( span.attributes, - { - SpanAttributes.DB_SYSTEM: "elasticsearch", - "elasticsearch.url": "/test-index/_search", - "elasticsearch.method": helpers.dsl_search_method, - "elasticsearch.target": "test-index", - SpanAttributes.DB_STATEMENT: str( - { - "query": { - "bool": { - "filter": [{"term": {"author": "testing"}}] - } - } - } - ), - }, + self.search_attributes, + ) + + def test_dsl_search_sanitized(self, request_mock): + # Reset instrumentation to use sanitized query (default) + ElasticsearchInstrumentor().uninstrument() + ElasticsearchInstrumentor().instrument() + + # update expected attributes to match sanitized query + sanitized_search_attributes = self.search_attributes.copy() + sanitized_search_attributes.update( + {SpanAttributes.DB_STATEMENT: "sanitized"} + ) + + request_mock.return_value = (1, {}, '{"hits": {"hits": []}}') + client = Elasticsearch() + search = Search(using=client, index="test-index").filter( + "term", author="testing" + ) + search.execute() + spans = self.get_finished_spans() + span = spans[0] + self.assertEqual(1, len(spans)) + self.assertEqual(span.name, "Elasticsearch//_search") + self.assertIsNotNone(span.end_time) + self.assertEqual( + span.attributes, + sanitized_search_attributes, ) def test_dsl_create(self, request_mock): @@ -270,11 +298,7 @@ def test_dsl_create(self, request_mock): self.assertEqual( span1.attributes, - { - SpanAttributes.DB_SYSTEM: "elasticsearch", - "elasticsearch.url": "/test-index", - "elasticsearch.method": "HEAD", - }, + self.create_attributes, ) attributes = { @@ -288,6 +312,24 @@ def test_dsl_create(self, request_mock): helpers.dsl_create_statement, ) + def test_dsl_create_sanitized(self, request_mock): + # Reset instrumentation to explicitly use sanitized query + ElasticsearchInstrumentor().uninstrument() + ElasticsearchInstrumentor().instrument(sanitize_query=True) + + request_mock.return_value = (1, {}, {}) + client = Elasticsearch() + Article.init(using=client) + + spans = self.get_finished_spans() + self.assertEqual(2, len(spans)) + span = spans.by_attr(key="elasticsearch.method", value="HEAD") + + self.assertEqual( + span.attributes, + self.create_attributes, + ) + def test_dsl_index(self, request_mock): request_mock.return_value = helpers.dsl_index_result @@ -323,7 +365,6 @@ def test_request_hook(self, request_mock): request_hook_kwargs_attribute = "request_hook.kwargs" def request_hook(span, method, url, kwargs): - attributes = { request_hook_method_attribute: method, request_hook_url_attribute: url, From b7732691c31e43bdc012572bdfca796f87c4b0d2 Mon Sep 17 00:00:00 2001 From: Nimrod Shlagman Date: Wed, 1 Feb 2023 14:57:03 +0200 Subject: [PATCH 2/8] add specific query sanitization --- .../instrumentation/elasticsearch/__init__.py | 4 +- .../instrumentation/elasticsearch/utils.py | 58 ++++++++++++++++++ .../tests/sanitization_queries.py | 61 +++++++++++++++++++ .../tests/test_elasticsearch.py | 40 +++++++----- 4 files changed, 146 insertions(+), 17 deletions(-) create mode 100644 instrumentation/opentelemetry-instrumentation-elasticsearch/src/opentelemetry/instrumentation/elasticsearch/utils.py create mode 100644 instrumentation/opentelemetry-instrumentation-elasticsearch/tests/sanitization_queries.py diff --git a/instrumentation/opentelemetry-instrumentation-elasticsearch/src/opentelemetry/instrumentation/elasticsearch/__init__.py b/instrumentation/opentelemetry-instrumentation-elasticsearch/src/opentelemetry/instrumentation/elasticsearch/__init__.py index 554e509cbc..5bae1eda23 100644 --- a/instrumentation/opentelemetry-instrumentation-elasticsearch/src/opentelemetry/instrumentation/elasticsearch/__init__.py +++ b/instrumentation/opentelemetry-instrumentation-elasticsearch/src/opentelemetry/instrumentation/elasticsearch/__init__.py @@ -96,6 +96,8 @@ def response_hook(span, response): from opentelemetry.semconv.trace import SpanAttributes from opentelemetry.trace import SpanKind, get_tracer +from .utils import sanitize_body + logger = getLogger(__name__) @@ -225,7 +227,7 @@ def wrapper(wrapped, _, args, kwargs): if body: statement = str(body) if sanitize_query: - statement = "sanitized" + statement = sanitize_body(body) attributes[SpanAttributes.DB_STATEMENT] = statement if params: attributes["elasticsearch.params"] = str(params) diff --git a/instrumentation/opentelemetry-instrumentation-elasticsearch/src/opentelemetry/instrumentation/elasticsearch/utils.py b/instrumentation/opentelemetry-instrumentation-elasticsearch/src/opentelemetry/instrumentation/elasticsearch/utils.py new file mode 100644 index 0000000000..8c94b5c241 --- /dev/null +++ b/instrumentation/opentelemetry-instrumentation-elasticsearch/src/opentelemetry/instrumentation/elasticsearch/utils.py @@ -0,0 +1,58 @@ +# Copyright The OpenTelemetry Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +sanitized_keys = ( + "message", + "should", + "filter", + "query", + "queries", + "intervals", + "match", +) +sanitized_value = "?" + + +def _flatten_dict(d, parent_key=""): + items = [] + for k, v in d.items(): + new_key = parent_key + "." + k if parent_key else k + if isinstance(v, dict): + items.extend(_flatten_dict(v, new_key).items()) + else: + items.append((new_key, v)) + return dict(items) + + +def _unflatten_dict(d): + res = {} + for k, v in d.items(): + keys = k.split(".") + d = res + for key in keys[:-1]: + if key not in d: + d[key] = {} + d = d[key] + d[keys[-1]] = v + return res + + +def sanitize_body(body) -> str: + flatten_body = _flatten_dict(body) + + for key in flatten_body.keys(): + if key.endswith(sanitized_keys): + flatten_body[key] = sanitized_value + + return str(_unflatten_dict(flatten_body)) diff --git a/instrumentation/opentelemetry-instrumentation-elasticsearch/tests/sanitization_queries.py b/instrumentation/opentelemetry-instrumentation-elasticsearch/tests/sanitization_queries.py new file mode 100644 index 0000000000..d7746f52a8 --- /dev/null +++ b/instrumentation/opentelemetry-instrumentation-elasticsearch/tests/sanitization_queries.py @@ -0,0 +1,61 @@ +interval_query = { + "query": { + "intervals": { + "my_text": { + "all_of": { + "ordered": True, + "intervals": [ + { + "match": { + "query": "my favorite food", + "max_gaps": 0, + "ordered": True, + } + }, + { + "any_of": { + "intervals": [ + {"match": {"query": "hot water"}}, + {"match": {"query": "cold porridge"}}, + ] + } + }, + ], + } + } + } + } +} + +match_query = {"query": {"match": {"message": {"query": "this is a test"}}}} + +filter_query = { + "query": { + "bool": { + "must": [ + {"match": {"title": "Search"}}, + {"match": {"content": "Elasticsearch"}}, + ], + "filter": [ + {"term": {"status": "published"}}, + {"range": {"publish_date": {"gte": "2015-01-01"}}}, + ], + } + } +} + +interval_query_sanitized = { + "query": {"intervals": {"my_text": {"all_of": {"ordered": True, "intervals": "?"}}}} +} +match_query_sanitized = {"query": {"match": {"message": {"query": "?"}}}} +filter_query_sanitized = { + "query": { + "bool": { + "must": [ + {"match": {"title": "Search"}}, + {"match": {"content": "Elasticsearch"}}, + ], + "filter": "?", + } + } +} diff --git a/instrumentation/opentelemetry-instrumentation-elasticsearch/tests/test_elasticsearch.py b/instrumentation/opentelemetry-instrumentation-elasticsearch/tests/test_elasticsearch.py index c319935569..c5c61d1317 100644 --- a/instrumentation/opentelemetry-instrumentation-elasticsearch/tests/test_elasticsearch.py +++ b/instrumentation/opentelemetry-instrumentation-elasticsearch/tests/test_elasticsearch.py @@ -24,15 +24,16 @@ import opentelemetry.instrumentation.elasticsearch from opentelemetry import trace -from opentelemetry.instrumentation.elasticsearch import ( - ElasticsearchInstrumentor, -) +from opentelemetry.instrumentation.elasticsearch import ElasticsearchInstrumentor +from opentelemetry.instrumentation.elasticsearch.utils import sanitize_body from opentelemetry.semconv.trace import SpanAttributes from opentelemetry.test.test_base import TestBase from opentelemetry.trace import StatusCode major_version = elasticsearch.VERSION[0] +from . import sanitization_queries as queries + if major_version == 7: from . import helpers_es7 as helpers # pylint: disable=no-name-in-module elif major_version == 6: @@ -155,9 +156,7 @@ def test_result_values(self, request_mock): self.assertEqual(1, len(spans)) self.assertEqual(spans[0].name, "Elasticsearch/test-index/_doc/:id") self.assertEqual("False", spans[0].attributes["elasticsearch.found"]) - self.assertEqual( - "True", spans[0].attributes["elasticsearch.timed_out"] - ) + self.assertEqual("True", spans[0].attributes["elasticsearch.timed_out"]) self.assertEqual("7", spans[0].attributes["elasticsearch.took"]) def test_trace_error_unknown(self, request_mock): @@ -184,9 +183,7 @@ def _test_trace_error(self, code, exc): span = spans[0] self.assertFalse(span.status.is_ok) self.assertEqual(span.status.status_code, code) - self.assertEqual( - span.status.description, f"{type(exc).__name__}: {exc}" - ) + self.assertEqual(span.status.description, f"{type(exc).__name__}: {exc}") def test_parent(self, request_mock): request_mock.return_value = (1, {}, {}) @@ -267,7 +264,7 @@ def test_dsl_search_sanitized(self, request_mock): # update expected attributes to match sanitized query sanitized_search_attributes = self.search_attributes.copy() sanitized_search_attributes.update( - {SpanAttributes.DB_STATEMENT: "sanitized"} + {SpanAttributes.DB_STATEMENT: "{'query': {'bool': {'filter': '?'}}}"} ) request_mock.return_value = (1, {}, '{"hits": {"hits": []}}') @@ -281,6 +278,7 @@ def test_dsl_search_sanitized(self, request_mock): self.assertEqual(1, len(spans)) self.assertEqual(span.name, "Elasticsearch//_search") self.assertIsNotNone(span.end_time) + print("~~~~~~~~~~", span.attributes) self.assertEqual( span.attributes, sanitized_search_attributes, @@ -292,6 +290,7 @@ def test_dsl_create(self, request_mock): Article.init(using=client) spans = self.get_finished_spans() + assert spans self.assertEqual(2, len(spans)) span1 = spans.by_attr(key="elasticsearch.method", value="HEAD") span2 = spans.by_attr(key="elasticsearch.method", value="PUT") @@ -322,6 +321,8 @@ def test_dsl_create_sanitized(self, request_mock): Article.init(using=client) spans = self.get_finished_spans() + assert spans + self.assertEqual(2, len(spans)) span = spans.by_attr(key="elasticsearch.method", value="HEAD") @@ -391,9 +392,7 @@ def request_hook(span, method, url, kwargs): spans = self.get_finished_spans() self.assertEqual(1, len(spans)) - self.assertEqual( - "GET", spans[0].attributes[request_hook_method_attribute] - ) + self.assertEqual("GET", spans[0].attributes[request_hook_method_attribute]) self.assertEqual( f"/{index}/_doc/{doc_id}", spans[0].attributes[request_hook_url_attribute], @@ -408,9 +407,7 @@ def test_response_hook(self, request_mock): def response_hook(span, response): if span and span.is_recording(): - span.set_attribute( - response_attribute_name, json.dumps(response) - ) + span.set_attribute(response_attribute_name, json.dumps(response)) ElasticsearchInstrumentor().uninstrument() ElasticsearchInstrumentor().instrument(response_hook=response_hook) @@ -454,3 +451,14 @@ def response_hook(span, response): json.dumps(response_payload), spans[0].attributes[response_attribute_name], ) + + def test_body_sanitization(self, _): + self.assertEqual( + sanitize_body(queries.interval_query), str(queries.interval_query_sanitized) + ) + self.assertEqual( + sanitize_body(queries.match_query), str(queries.match_query_sanitized) + ) + self.assertEqual( + sanitize_body(queries.filter_query), str(queries.filter_query_sanitized) + ) From 0c19af8b07bd018250f9a18e1ed6c3522632b9c1 Mon Sep 17 00:00:00 2001 From: Nimrod Shlagman Date: Wed, 1 Feb 2023 15:04:51 +0200 Subject: [PATCH 3/8] tox generate fix --- .../instrumentation/auto_instrumentation/__init__.py | 4 ---- .../src/opentelemetry/instrumentation/instrumentor.py | 1 - 2 files changed, 5 deletions(-) diff --git a/opentelemetry-instrumentation/src/opentelemetry/instrumentation/auto_instrumentation/__init__.py b/opentelemetry-instrumentation/src/opentelemetry/instrumentation/auto_instrumentation/__init__.py index 7d2ca83294..5758ef1834 100644 --- a/opentelemetry-instrumentation/src/opentelemetry/instrumentation/auto_instrumentation/__init__.py +++ b/opentelemetry-instrumentation/src/opentelemetry/instrumentation/auto_instrumentation/__init__.py @@ -29,7 +29,6 @@ def run() -> None: - parser = ArgumentParser( description=""" opentelemetry-instrument automatically instruments a Python @@ -57,9 +56,7 @@ def run() -> None: environment_variable_module = entry_point.load() for attribute in dir(environment_variable_module): - if attribute.startswith("OTEL_"): - argument = sub(r"OTEL_(PYTHON_)?", "", attribute).lower() parser.add_argument( @@ -88,7 +85,6 @@ def run() -> None: ).items(): value = getattr(args, argument) if value is not None: - environ[otel_environment_variable] = value python_path = environ.get("PYTHONPATH") diff --git a/opentelemetry-instrumentation/src/opentelemetry/instrumentation/instrumentor.py b/opentelemetry-instrumentation/src/opentelemetry/instrumentation/instrumentor.py index 060ac484e7..160fe3c450 100644 --- a/opentelemetry-instrumentation/src/opentelemetry/instrumentation/instrumentor.py +++ b/opentelemetry-instrumentation/src/opentelemetry/instrumentation/instrumentor.py @@ -46,7 +46,6 @@ class BaseInstrumentor(ABC): _is_instrumented_by_opentelemetry = False def __new__(cls, *args, **kwargs): - if cls._instance is None: cls._instance = object.__new__(cls, *args, **kwargs) From 44c0bb2c072c85761e39006ff71367c18e51fb8d Mon Sep 17 00:00:00 2001 From: Nimrod Shlagman Date: Sun, 5 Feb 2023 16:23:44 +0200 Subject: [PATCH 4/8] lint fixes --- CHANGELOG.md | 4 +- .../instrumentation/elasticsearch/__init__.py | 3 +- .../instrumentation/elasticsearch/utils.py | 1 + .../tests/sanitization_queries.py | 6 ++- .../tests/test_elasticsearch.py | 43 +++++++++++++------ 5 files changed, 39 insertions(+), 18 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 788e4344d6..8be75b9754 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,8 +9,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added -- Add default query sanitization for elasticsearch db.statement attribute - ([#1545](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/1545)) +- Add optional query sanitization for elasticsearch db.statement attribute + ([#1598](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/1598)) - `opentelemetry-instrumentation-redis` Add `sanitize_query` config option to allow query sanitization. ([#1572](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/1572)) - `opentelemetry-instrumentation-celery` Record exceptions as events on the span. diff --git a/instrumentation/opentelemetry-instrumentation-elasticsearch/src/opentelemetry/instrumentation/elasticsearch/__init__.py b/instrumentation/opentelemetry-instrumentation-elasticsearch/src/opentelemetry/instrumentation/elasticsearch/__init__.py index 0aa870073a..4fd6bc79e1 100644 --- a/instrumentation/opentelemetry-instrumentation-elasticsearch/src/opentelemetry/instrumentation/elasticsearch/__init__.py +++ b/instrumentation/opentelemetry-instrumentation-elasticsearch/src/opentelemetry/instrumentation/elasticsearch/__init__.py @@ -44,6 +44,7 @@ The instrument() method accepts the following keyword args: tracer_provider (TracerProvider) - an optional tracer provider +sanitize_query (bool) - an optional query sanitization flag request_hook (Callable) - a function with extra user-defined logic to be performed before performing the request this function signature is: def request_hook(span: Span, method: str, url: str, kwargs) @@ -137,7 +138,7 @@ def _instrument(self, **kwargs): tracer = get_tracer(__name__, __version__, tracer_provider) request_hook = kwargs.get("request_hook") response_hook = kwargs.get("response_hook") - sanitize_query = kwargs.get("sanitize_query", True) + sanitize_query = kwargs.get("sanitize_query", False) _wrap( elasticsearch, "Transport.perform_request", diff --git a/instrumentation/opentelemetry-instrumentation-elasticsearch/src/opentelemetry/instrumentation/elasticsearch/utils.py b/instrumentation/opentelemetry-instrumentation-elasticsearch/src/opentelemetry/instrumentation/elasticsearch/utils.py index 8c94b5c241..6f7b6f7bc7 100644 --- a/instrumentation/opentelemetry-instrumentation-elasticsearch/src/opentelemetry/instrumentation/elasticsearch/utils.py +++ b/instrumentation/opentelemetry-instrumentation-elasticsearch/src/opentelemetry/instrumentation/elasticsearch/utils.py @@ -24,6 +24,7 @@ sanitized_value = "?" +# pylint: disable=C0103 def _flatten_dict(d, parent_key=""): items = [] for k, v in d.items(): diff --git a/instrumentation/opentelemetry-instrumentation-elasticsearch/tests/sanitization_queries.py b/instrumentation/opentelemetry-instrumentation-elasticsearch/tests/sanitization_queries.py index d7746f52a8..234e24433e 100644 --- a/instrumentation/opentelemetry-instrumentation-elasticsearch/tests/sanitization_queries.py +++ b/instrumentation/opentelemetry-instrumentation-elasticsearch/tests/sanitization_queries.py @@ -45,7 +45,11 @@ } interval_query_sanitized = { - "query": {"intervals": {"my_text": {"all_of": {"ordered": True, "intervals": "?"}}}} + "query": { + "intervals": { + "my_text": {"all_of": {"ordered": True, "intervals": "?"}} + } + } } match_query_sanitized = {"query": {"match": {"message": {"query": "?"}}}} filter_query_sanitized = { diff --git a/instrumentation/opentelemetry-instrumentation-elasticsearch/tests/test_elasticsearch.py b/instrumentation/opentelemetry-instrumentation-elasticsearch/tests/test_elasticsearch.py index c5c61d1317..b5efbb38a2 100644 --- a/instrumentation/opentelemetry-instrumentation-elasticsearch/tests/test_elasticsearch.py +++ b/instrumentation/opentelemetry-instrumentation-elasticsearch/tests/test_elasticsearch.py @@ -11,6 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. + import json import os import threading @@ -24,16 +25,18 @@ import opentelemetry.instrumentation.elasticsearch from opentelemetry import trace -from opentelemetry.instrumentation.elasticsearch import ElasticsearchInstrumentor +from opentelemetry.instrumentation.elasticsearch import ( + ElasticsearchInstrumentor, +) from opentelemetry.instrumentation.elasticsearch.utils import sanitize_body from opentelemetry.semconv.trace import SpanAttributes from opentelemetry.test.test_base import TestBase from opentelemetry.trace import StatusCode -major_version = elasticsearch.VERSION[0] - from . import sanitization_queries as queries +major_version = elasticsearch.VERSION[0] + if major_version == 7: from . import helpers_es7 as helpers # pylint: disable=no-name-in-module elif major_version == 6: @@ -69,7 +72,7 @@ class TestElasticsearchIntegration(TestBase): def setUp(self): super().setUp() self.tracer = self.tracer_provider.get_tracer(__name__) - ElasticsearchInstrumentor().instrument(sanitize_query=False) + ElasticsearchInstrumentor().instrument() def tearDown(self): super().tearDown() @@ -156,7 +159,9 @@ def test_result_values(self, request_mock): self.assertEqual(1, len(spans)) self.assertEqual(spans[0].name, "Elasticsearch/test-index/_doc/:id") self.assertEqual("False", spans[0].attributes["elasticsearch.found"]) - self.assertEqual("True", spans[0].attributes["elasticsearch.timed_out"]) + self.assertEqual( + "True", spans[0].attributes["elasticsearch.timed_out"] + ) self.assertEqual("7", spans[0].attributes["elasticsearch.took"]) def test_trace_error_unknown(self, request_mock): @@ -183,7 +188,9 @@ def _test_trace_error(self, code, exc): span = spans[0] self.assertFalse(span.status.is_ok) self.assertEqual(span.status.status_code, code) - self.assertEqual(span.status.description, f"{type(exc).__name__}: {exc}") + self.assertEqual( + span.status.description, f"{type(exc).__name__}: {exc}" + ) def test_parent(self, request_mock): request_mock.return_value = (1, {}, {}) @@ -259,12 +266,14 @@ def test_dsl_search(self, request_mock): def test_dsl_search_sanitized(self, request_mock): # Reset instrumentation to use sanitized query (default) ElasticsearchInstrumentor().uninstrument() - ElasticsearchInstrumentor().instrument() + ElasticsearchInstrumentor().instrument(sanitize_query=True) # update expected attributes to match sanitized query sanitized_search_attributes = self.search_attributes.copy() sanitized_search_attributes.update( - {SpanAttributes.DB_STATEMENT: "{'query': {'bool': {'filter': '?'}}}"} + { + SpanAttributes.DB_STATEMENT: "{'query': {'bool': {'filter': '?'}}}" + } ) request_mock.return_value = (1, {}, '{"hits": {"hits": []}}') @@ -278,7 +287,6 @@ def test_dsl_search_sanitized(self, request_mock): self.assertEqual(1, len(spans)) self.assertEqual(span.name, "Elasticsearch//_search") self.assertIsNotNone(span.end_time) - print("~~~~~~~~~~", span.attributes) self.assertEqual( span.attributes, sanitized_search_attributes, @@ -392,7 +400,9 @@ def request_hook(span, method, url, kwargs): spans = self.get_finished_spans() self.assertEqual(1, len(spans)) - self.assertEqual("GET", spans[0].attributes[request_hook_method_attribute]) + self.assertEqual( + "GET", spans[0].attributes[request_hook_method_attribute] + ) self.assertEqual( f"/{index}/_doc/{doc_id}", spans[0].attributes[request_hook_url_attribute], @@ -407,7 +417,9 @@ def test_response_hook(self, request_mock): def response_hook(span, response): if span and span.is_recording(): - span.set_attribute(response_attribute_name, json.dumps(response)) + span.set_attribute( + response_attribute_name, json.dumps(response) + ) ElasticsearchInstrumentor().uninstrument() ElasticsearchInstrumentor().instrument(response_hook=response_hook) @@ -454,11 +466,14 @@ def response_hook(span, response): def test_body_sanitization(self, _): self.assertEqual( - sanitize_body(queries.interval_query), str(queries.interval_query_sanitized) + sanitize_body(queries.interval_query), + str(queries.interval_query_sanitized), ) self.assertEqual( - sanitize_body(queries.match_query), str(queries.match_query_sanitized) + sanitize_body(queries.match_query), + str(queries.match_query_sanitized), ) self.assertEqual( - sanitize_body(queries.filter_query), str(queries.filter_query_sanitized) + sanitize_body(queries.filter_query), + str(queries.filter_query_sanitized), ) From d84bf7eb5ad504c780ea8f5a9829b3c62b499c99 Mon Sep 17 00:00:00 2001 From: Nimrod Shlagman Date: Sun, 5 Feb 2023 17:27:37 +0200 Subject: [PATCH 5/8] clean CHANGELOG.md --- CHANGELOG.md | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8be75b9754..a6c127765a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,10 +9,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added -- Add optional query sanitization for elasticsearch db.statement attribute +- `opentelemetry-instrumentation-elasticsearch` Add optional db.statement query sanitization. ([#1598](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/1598)) -- `opentelemetry-instrumentation-redis` Add `sanitize_query` config option to allow query sanitization. - ([#1572](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/1572)) - `opentelemetry-instrumentation-celery` Record exceptions as events on the span. ([#1573](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/1573)) - Add metric instrumentation for urllib From 5df30347f5232ac203cdd7f1096d362bd1ae6f8b Mon Sep 17 00:00:00 2001 From: Nimrod Shlagman Date: Sun, 5 Feb 2023 17:33:20 +0200 Subject: [PATCH 6/8] clean CHANGELOG.md2 --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index a6c127765a..54bf5a9a80 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added +- `opentelemetry-instrumentation-redis` Add `sanitize_query` config option to allow query sanitization. ([#1572](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/1572)) - `opentelemetry-instrumentation-elasticsearch` Add optional db.statement query sanitization. ([#1598](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/1598)) - `opentelemetry-instrumentation-celery` Record exceptions as events on the span. From d389280ba6c0186d06b5f4d08465ed5301f55c7a Mon Sep 17 00:00:00 2001 From: Nimrod Shlagman Date: Mon, 6 Feb 2023 13:00:19 +0200 Subject: [PATCH 7/8] lint fix --- .../tests/test_elasticsearch.py | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/instrumentation/opentelemetry-instrumentation-elasticsearch/tests/test_elasticsearch.py b/instrumentation/opentelemetry-instrumentation-elasticsearch/tests/test_elasticsearch.py index b5efbb38a2..f7168b39d2 100644 --- a/instrumentation/opentelemetry-instrumentation-elasticsearch/tests/test_elasticsearch.py +++ b/instrumentation/opentelemetry-instrumentation-elasticsearch/tests/test_elasticsearch.py @@ -33,7 +33,7 @@ from opentelemetry.test.test_base import TestBase from opentelemetry.trace import StatusCode -from . import sanitization_queries as queries +from . import sanitization_queries # pylint: disable=no-name-in-module major_version = elasticsearch.VERSION[0] @@ -323,7 +323,6 @@ def test_dsl_create_sanitized(self, request_mock): # Reset instrumentation to explicitly use sanitized query ElasticsearchInstrumentor().uninstrument() ElasticsearchInstrumentor().instrument(sanitize_query=True) - request_mock.return_value = (1, {}, {}) client = Elasticsearch() Article.init(using=client) @@ -466,14 +465,14 @@ def response_hook(span, response): def test_body_sanitization(self, _): self.assertEqual( - sanitize_body(queries.interval_query), - str(queries.interval_query_sanitized), + sanitize_body(sanitization_queries.interval_query), + str(sanitization_queries.interval_query_sanitized), ) self.assertEqual( - sanitize_body(queries.match_query), - str(queries.match_query_sanitized), + sanitize_body(sanitization_queries.match_query), + str(sanitization_queries.match_query_sanitized), ) self.assertEqual( - sanitize_body(queries.filter_query), - str(queries.filter_query_sanitized), + sanitize_body(sanitization_queries.filter_query), + str(sanitization_queries.filter_query_sanitized), ) From b370f4342ca3c685daffbd1a3f3d54be9e8981dd Mon Sep 17 00:00:00 2001 From: Nimrod Shlagman Date: Mon, 6 Feb 2023 13:21:41 +0200 Subject: [PATCH 8/8] lint fix2 --- .../src/opentelemetry/instrumentation/elasticsearch/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/instrumentation/opentelemetry-instrumentation-elasticsearch/src/opentelemetry/instrumentation/elasticsearch/utils.py b/instrumentation/opentelemetry-instrumentation-elasticsearch/src/opentelemetry/instrumentation/elasticsearch/utils.py index 6f7b6f7bc7..7c5d753b31 100644 --- a/instrumentation/opentelemetry-instrumentation-elasticsearch/src/opentelemetry/instrumentation/elasticsearch/utils.py +++ b/instrumentation/opentelemetry-instrumentation-elasticsearch/src/opentelemetry/instrumentation/elasticsearch/utils.py @@ -52,7 +52,7 @@ def _unflatten_dict(d): def sanitize_body(body) -> str: flatten_body = _flatten_dict(body) - for key in flatten_body.keys(): + for key in flatten_body: if key.endswith(sanitized_keys): flatten_body[key] = sanitized_value