Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
19 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,36 @@ public void coalesce_nonexistent_field_fallback() throws IOException {
timing(summary, "coalesce_nonexistent_field_fallback", ppl);
}

/**
* Tests regex-based field extraction and transformation using rex command. Validates that the
* Calcite plan correctly handles regex patterns.
*/
@Test
public void rex_regex_transformation() throws IOException {
String ppl = sanitize(loadExpectedQuery("rex_regex_transformation.ppl"));
timing(summary, "rex_regex_transformation", ppl);
}

/**
* Tests LIKE pattern matching with aggregation using script engine. Validates filtering by
* message content and grouping results.
*/
@Test
public void script_engine_like_pattern_with_aggregation() throws IOException {
String ppl = sanitize(loadExpectedQuery("script_engine_like_pattern_with_aggregation.ppl"));
timing(summary, "script_engine_like_pattern_with_aggregation", ppl);
}

/**
* Tests LIKE pattern matching with sorting and result limiting. Validates filtering by message
* content with timestamp ordering.
*/
@Test
public void script_engine_like_pattern_with_sort() throws IOException {
String ppl = sanitize(loadExpectedQuery("script_engine_like_pattern_with_sort.ppl"));
timing(summary, "script_engine_like_pattern_with_sort", ppl);
}

/** Tests deduplication by metrics.size field with sorting by timestamp. */
@Test
public void dedup_metrics_size_field() throws IOException {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -738,7 +738,11 @@ public void testCountBySpanForCustomFormats() throws IOException {
public void testSpanByImplicitTimestamp() throws IOException {
JSONObject result = executeQuery("source=big5 | stats count() by span(1d) as span");
verifySchema(result, schema("count()", "bigint"), schema("span", "timestamp"));
verifyDataRows(result, rows(1, "2023-01-02 00:00:00"));
verifyDataRows(
result,
rows(1, "2023-01-02 00:00:00"),
rows(1, "2023-03-01 00:00:00"),
rows(1, "2023-05-01 00:00:00"));

Throwable t =
assertThrowsWithReplace(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ public void testIsNotNull() throws IOException {
public void testIsNotNullWithStruct() throws IOException {
JSONObject actual = executeQuery("source=big5 | where isnotnull(aws) | fields aws");
verifySchema(actual, schema("aws", "struct"));
verifyNumOfRows(actual, 1);
verifyNumOfRows(actual, 3);
}

@Test
Expand Down
4 changes: 4 additions & 0 deletions integ-test/src/test/resources/big5/data/big5.json
Original file line number Diff line number Diff line change
@@ -1,2 +1,6 @@
{"index":{}}
{"message":"2023-04-30T21:48:56.160Z Apr 30 21:48:56 ip-66-221-134-40 journal: donkey glazer fly shark whip servant thornfalcon","process":{"name":"journal"},"aws.cloudwatch":{"ingestion_time":"2023-04-30T21:48:56.160Z","log_group":"/var/log/messages","log_stream":"luckcrafter"},"tags":["preserve_original_event"],"meta":{"file":"2023-01-02/1682891301-gotext.ndjson.gz"},"cloud":{"region":"eu-central-1"},"@timestamp":"2023-01-02T22:02:34.000Z","input":{"type":"aws-cloudwatch"},"metrics":{"tmin":849,"size":1981},"log.file.path":"/var/log/messages/luckcrafter","event":{"id":"sunsetmark","dataset":"generic","ingested":"2023-07-20T03:36:30.223806Z"},"agent":{"id":"c315dc22-3ea6-44dc-8d56-fd02f675367b","name":"fancydancer","ephemeral_id":"c315dc22-3ea6-44dc-8d56-fd02f675367b","type":"filebeat","version":"8.8.0"}}
{"index":{}}
{"message":"2024-04-11T18:00:10.965Z Apr 11 18:00:10 ip-32-11-43-93 sshd: cloak bolt thorn hugger rib jackal wolverine shaker boar fighter taker boulderfox","process":{"name":"sshd"},"aws.cloudwatch":{"log_stream":"mirrorlighter","ingestion_time":"2024-04-11T18:00:10.965Z","log_group":"/var/log/messages"},"tags":["preserve_original_event"],"meta":{"file":"2024-04-11/1712851210-sshd.ndjson.gz"},"cloud":{"region":"ap-southeast-3"},"@timestamp":"2023-05-01T21:59:58.000Z","input":{"type":"aws-cloudwatch"},"metrics":{"size":3166,"tmin":1},"log.file.path":"/var/log/messages/mirrorlighter","event":{"id":"patternantler","ingested":"2024-04-11T17:39:10.965818973Z","dataset":"generic"},"agent":{"id":"c79a289f-6c16-4de2-a6c8-8ee5c84473d5","name":"brindlehugger","type":"filebeat","version":"8.8.0","ephemeral_id":"c79a289f-6c16-4de2-a6c8-8ee5c84473d5"}}
{"index":{}}
{"message":"2024-04-11T10:15:01.628Z Apr 11 10:15:01 ip-95-21-51-112 kernel: kicker stinger slave dolphin sparkox","process":{"name":"kernel"},"aws.cloudwatch":{"log_stream":"plumebard","ingestion_time":"2024-04-11T10:15:01.628Z","log_group":"/var/log/messages"},"tags":["preserve_original_event"],"meta":{"file":"2024-04-11/1712826901-kernel.ndjson.gz"},"cloud":{"region":"ap-south-1"},"@timestamp":"2023-03-01T22:31:11.000Z","input":{"type":"aws-cloudwatch"},"metrics":{"size":3993,"tmin":1},"log.file.path":"/var/log/messages/plumebard","event":{"id":"chipgambler","ingested":"2024-04-11T10:09:29.628941177Z","dataset":"generic"},"agent":{"id":"5f25fa16-6a99-489f-b1c5-f27c0627a459","name":"lemongrabber","type":"filebeat","version":"8.8.0","ephemeral_id":"5f25fa16-6a99-489f-b1c5-f27c0627a459"}}
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
/* Extract log type and filename from file paths, calculate filename length, and sort by timestamp */
/*
{
"name": "rex_regex_transformation",
"operation-type": "search",
"index": "{{index_name | default('big5')}}",
"body": {
"query": {
"match_all": {}
},
"_source": {
"includes": ["log.file.path", "@timestamp"],
"excludes": []
},
"sort": [
{
"@timestamp": {
"order": "desc",
"missing": "_last"
}
}
]
}
}
*/
source = big5
| rex field=log.file.path '/var/log/(?<logType>\\w+)/(?<filename>\\w+)'
| eval filename_len = length(filename)
| fields log.file.path, logType, filename, filename_len, @timestamp
| sort - @timestamp
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
/* Filter messages containing 'sshd' and aggregate count by metrics.size */
/*
{
"name": "script_engine_like_pattern_with_aggregation",
"operation-type": "search",
"index": "{{index_name | default('custom-big5')}}",
"body": {
"query": {
"script": {
"script": {
"source": "{\"langType\":\"calcite\",\"script\":\"...\"}",
"lang": "opensearch_compounded_script",
"params": {
"utcTimestamp": "{{current_timestamp}}"
}
},
"boost": 1.0
}
},
"_source": {
"includes": ["message", "metrics.size"],
"excludes": []
},
"aggregations": {
"composite_buckets": {
"composite": {
"size": 10000,
"sources": [
{
"metrics.size": {
"terms": {
"field": "metrics.size",
"missing_bucket": true,
"missing_order": "first",
"order": "asc"
}
}
}
]
}
}
}
}
}
*/
source = big5
| where like(`message`, '%sshd%')
| stats count() by metrics.size
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
/* Filter messages containing 'sshd', sort by timestamp, and return top 10 results */
/*
{
"name": "script_engine_like_pattern_with_sort",
"operation-type": "search",
"index": "{{index_name | default('big5')}}",
"body": {
"query": {
"script": {
"script": {
"source": "{\"langType\":\"calcite\",\"script\":\"...\"}",
"lang": "opensearch_compounded_script",
"params": {
"utcTimestamp": "{{current_timestamp}}"
}
},
"boost": 1.0
}
},
"size": 10,
"_source": {
"includes": ["agent", "process", "log", "message", "tags", "cloud", "input", "@timestamp", "ecs", "data_stream", "meta", "host", "metrics", "aws", "event"],
"excludes": []
},
"sort": [
{
"@timestamp": {
"order": "desc",
"missing": "_last"
}
}
]
}
}
*/
source = big5
| where like(`message`, '%sshd%')
| sort - @timestamp
| head 10
Loading