diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/big5/CalcitePPLBig5IT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/big5/CalcitePPLBig5IT.java index f98b51d1c03..d1eb26cabbe 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/big5/CalcitePPLBig5IT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/big5/CalcitePPLBig5IT.java @@ -45,6 +45,36 @@ public void coalesce_nonexistent_field_fallback() throws IOException { timing(summary, "coalesce_nonexistent_field_fallback", ppl); } + /** + * Tests regex-based field extraction and transformation using rex command. Validates that the + * Calcite plan correctly handles regex patterns. + */ + @Test + public void rex_regex_transformation() throws IOException { + String ppl = sanitize(loadExpectedQuery("rex_regex_transformation.ppl")); + timing(summary, "rex_regex_transformation", ppl); + } + + /** + * Tests LIKE pattern matching with aggregation using script engine. Validates filtering by + * message content and grouping results. + */ + @Test + public void script_engine_like_pattern_with_aggregation() throws IOException { + String ppl = sanitize(loadExpectedQuery("script_engine_like_pattern_with_aggregation.ppl")); + timing(summary, "script_engine_like_pattern_with_aggregation", ppl); + } + + /** + * Tests LIKE pattern matching with sorting and result limiting. Validates filtering by message + * content with timestamp ordering. + */ + @Test + public void script_engine_like_pattern_with_sort() throws IOException { + String ppl = sanitize(loadExpectedQuery("script_engine_like_pattern_with_sort.ppl")); + timing(summary, "script_engine_like_pattern_with_sort", ppl); + } + /** Tests deduplication by metrics.size field with sorting by timestamp. */ @Test public void dedup_metrics_size_field() throws IOException { diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLAggregationIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLAggregationIT.java index 9710d2f4415..6e9b0c7b6d7 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLAggregationIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLAggregationIT.java @@ -738,7 +738,11 @@ public void testCountBySpanForCustomFormats() throws IOException { public void testSpanByImplicitTimestamp() throws IOException { JSONObject result = executeQuery("source=big5 | stats count() by span(1d) as span"); verifySchema(result, schema("count()", "bigint"), schema("span", "timestamp")); - verifyDataRows(result, rows(1, "2023-01-02 00:00:00")); + verifyDataRows( + result, + rows(1, "2023-01-02 00:00:00"), + rows(1, "2023-03-01 00:00:00"), + rows(1, "2023-05-01 00:00:00")); Throwable t = assertThrowsWithReplace( diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLConditionBuiltinFunctionIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLConditionBuiltinFunctionIT.java index f7c81e797df..ad132f3eb7e 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLConditionBuiltinFunctionIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLConditionBuiltinFunctionIT.java @@ -96,7 +96,7 @@ public void testIsNotNull() throws IOException { public void testIsNotNullWithStruct() throws IOException { JSONObject actual = executeQuery("source=big5 | where isnotnull(aws) | fields aws"); verifySchema(actual, schema("aws", "struct")); - verifyNumOfRows(actual, 1); + verifyNumOfRows(actual, 3); } @Test diff --git a/integ-test/src/test/resources/big5/data/big5.json b/integ-test/src/test/resources/big5/data/big5.json index 30a6a81ab8e..c9a0dc07a14 100644 --- a/integ-test/src/test/resources/big5/data/big5.json +++ b/integ-test/src/test/resources/big5/data/big5.json @@ -1,2 +1,6 @@ {"index":{}} {"message":"2023-04-30T21:48:56.160Z Apr 30 21:48:56 ip-66-221-134-40 journal: donkey glazer fly shark whip servant thornfalcon","process":{"name":"journal"},"aws.cloudwatch":{"ingestion_time":"2023-04-30T21:48:56.160Z","log_group":"/var/log/messages","log_stream":"luckcrafter"},"tags":["preserve_original_event"],"meta":{"file":"2023-01-02/1682891301-gotext.ndjson.gz"},"cloud":{"region":"eu-central-1"},"@timestamp":"2023-01-02T22:02:34.000Z","input":{"type":"aws-cloudwatch"},"metrics":{"tmin":849,"size":1981},"log.file.path":"/var/log/messages/luckcrafter","event":{"id":"sunsetmark","dataset":"generic","ingested":"2023-07-20T03:36:30.223806Z"},"agent":{"id":"c315dc22-3ea6-44dc-8d56-fd02f675367b","name":"fancydancer","ephemeral_id":"c315dc22-3ea6-44dc-8d56-fd02f675367b","type":"filebeat","version":"8.8.0"}} +{"index":{}} +{"message":"2024-04-11T18:00:10.965Z Apr 11 18:00:10 ip-32-11-43-93 sshd: cloak bolt thorn hugger rib jackal wolverine shaker boar fighter taker boulderfox","process":{"name":"sshd"},"aws.cloudwatch":{"log_stream":"mirrorlighter","ingestion_time":"2024-04-11T18:00:10.965Z","log_group":"/var/log/messages"},"tags":["preserve_original_event"],"meta":{"file":"2024-04-11/1712851210-sshd.ndjson.gz"},"cloud":{"region":"ap-southeast-3"},"@timestamp":"2023-05-01T21:59:58.000Z","input":{"type":"aws-cloudwatch"},"metrics":{"size":3166,"tmin":1},"log.file.path":"/var/log/messages/mirrorlighter","event":{"id":"patternantler","ingested":"2024-04-11T17:39:10.965818973Z","dataset":"generic"},"agent":{"id":"c79a289f-6c16-4de2-a6c8-8ee5c84473d5","name":"brindlehugger","type":"filebeat","version":"8.8.0","ephemeral_id":"c79a289f-6c16-4de2-a6c8-8ee5c84473d5"}} +{"index":{}} +{"message":"2024-04-11T10:15:01.628Z Apr 11 10:15:01 ip-95-21-51-112 kernel: kicker stinger slave dolphin sparkox","process":{"name":"kernel"},"aws.cloudwatch":{"log_stream":"plumebard","ingestion_time":"2024-04-11T10:15:01.628Z","log_group":"/var/log/messages"},"tags":["preserve_original_event"],"meta":{"file":"2024-04-11/1712826901-kernel.ndjson.gz"},"cloud":{"region":"ap-south-1"},"@timestamp":"2023-03-01T22:31:11.000Z","input":{"type":"aws-cloudwatch"},"metrics":{"size":3993,"tmin":1},"log.file.path":"/var/log/messages/plumebard","event":{"id":"chipgambler","ingested":"2024-04-11T10:09:29.628941177Z","dataset":"generic"},"agent":{"id":"5f25fa16-6a99-489f-b1c5-f27c0627a459","name":"lemongrabber","type":"filebeat","version":"8.8.0","ephemeral_id":"5f25fa16-6a99-489f-b1c5-f27c0627a459"}} diff --git a/integ-test/src/test/resources/big5/queries/rex_regex_transformation.ppl b/integ-test/src/test/resources/big5/queries/rex_regex_transformation.ppl new file mode 100644 index 00000000000..20671d9f6f8 --- /dev/null +++ b/integ-test/src/test/resources/big5/queries/rex_regex_transformation.ppl @@ -0,0 +1,30 @@ +/* Extract log type and filename from file paths, calculate filename length, and sort by timestamp */ +/* +{ + "name": "rex_regex_transformation", + "operation-type": "search", + "index": "{{index_name | default('big5')}}", + "body": { + "query": { + "match_all": {} + }, + "_source": { + "includes": ["log.file.path", "@timestamp"], + "excludes": [] + }, + "sort": [ + { + "@timestamp": { + "order": "desc", + "missing": "_last" + } + } + ] + } +} +*/ +source = big5 +| rex field=log.file.path '/var/log/(?\\w+)/(?\\w+)' +| eval filename_len = length(filename) +| fields log.file.path, logType, filename, filename_len, @timestamp +| sort - @timestamp \ No newline at end of file diff --git a/integ-test/src/test/resources/big5/queries/script_engine_like_pattern_with_aggregation.ppl b/integ-test/src/test/resources/big5/queries/script_engine_like_pattern_with_aggregation.ppl new file mode 100644 index 00000000000..a8b3982bb70 --- /dev/null +++ b/integ-test/src/test/resources/big5/queries/script_engine_like_pattern_with_aggregation.ppl @@ -0,0 +1,48 @@ +/* Filter messages containing 'sshd' and aggregate count by metrics.size */ +/* +{ + "name": "script_engine_like_pattern_with_aggregation", + "operation-type": "search", + "index": "{{index_name | default('custom-big5')}}", + "body": { + "query": { + "script": { + "script": { + "source": "{\"langType\":\"calcite\",\"script\":\"...\"}", + "lang": "opensearch_compounded_script", + "params": { + "utcTimestamp": "{{current_timestamp}}" + } + }, + "boost": 1.0 + } + }, + "_source": { + "includes": ["message", "metrics.size"], + "excludes": [] + }, + "aggregations": { + "composite_buckets": { + "composite": { + "size": 10000, + "sources": [ + { + "metrics.size": { + "terms": { + "field": "metrics.size", + "missing_bucket": true, + "missing_order": "first", + "order": "asc" + } + } + } + ] + } + } + } + } +} +*/ +source = big5 +| where like(`message`, '%sshd%') +| stats count() by metrics.size \ No newline at end of file diff --git a/integ-test/src/test/resources/big5/queries/script_engine_like_pattern_with_sort.ppl b/integ-test/src/test/resources/big5/queries/script_engine_like_pattern_with_sort.ppl new file mode 100644 index 00000000000..f69536b95d3 --- /dev/null +++ b/integ-test/src/test/resources/big5/queries/script_engine_like_pattern_with_sort.ppl @@ -0,0 +1,39 @@ +/* Filter messages containing 'sshd', sort by timestamp, and return top 10 results */ +/* +{ + "name": "script_engine_like_pattern_with_sort", + "operation-type": "search", + "index": "{{index_name | default('big5')}}", + "body": { + "query": { + "script": { + "script": { + "source": "{\"langType\":\"calcite\",\"script\":\"...\"}", + "lang": "opensearch_compounded_script", + "params": { + "utcTimestamp": "{{current_timestamp}}" + } + }, + "boost": 1.0 + } + }, + "size": 10, + "_source": { + "includes": ["agent", "process", "log", "message", "tags", "cloud", "input", "@timestamp", "ecs", "data_stream", "meta", "host", "metrics", "aws", "event"], + "excludes": [] + }, + "sort": [ + { + "@timestamp": { + "order": "desc", + "missing": "_last" + } + } + ] + } +} +*/ +source = big5 +| where like(`message`, '%sshd%') +| sort - @timestamp +| head 10 \ No newline at end of file