-
Notifications
You must be signed in to change notification settings - Fork 180
Add Frequently Used Big5 PPL Queries #4976
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
4c30c61
1297fa2
1f006db
5129270
48d316a
c49e7e4
d052ba3
08477f7
1b1203a
237478a
cf4b1e0
9cc6f75
edef39c
fdc7e8e
8dbb518
0bf2fb8
d6079b8
3ed2ba6
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -738,7 +738,11 @@ public void testCountBySpanForCustomFormats() throws IOException { | |
| public void testSpanByImplicitTimestamp() throws IOException { | ||
| JSONObject result = executeQuery("source=big5 | stats count() by span(1d) as span"); | ||
| verifySchema(result, schema("count()", "bigint"), schema("span", "timestamp")); | ||
| verifyDataRows(result, rows(1, "2023-01-02 00:00:00")); | ||
| verifyDataRows( | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This PR is only to add more test queries right? Why the behavior changed?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The behavior changed because the file |
||
| result, | ||
| rows(1, "2023-01-02 00:00:00"), | ||
| rows(1, "2023-03-01 00:00:00"), | ||
| rows(1, "2023-05-01 00:00:00")); | ||
|
|
||
| Throwable t = | ||
| assertThrowsWithReplace( | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,2 +1,6 @@ | ||
| {"index":{}} | ||
| {"message":"2023-04-30T21:48:56.160Z Apr 30 21:48:56 ip-66-221-134-40 journal: donkey glazer fly shark whip servant thornfalcon","process":{"name":"journal"},"aws.cloudwatch":{"ingestion_time":"2023-04-30T21:48:56.160Z","log_group":"/var/log/messages","log_stream":"luckcrafter"},"tags":["preserve_original_event"],"meta":{"file":"2023-01-02/1682891301-gotext.ndjson.gz"},"cloud":{"region":"eu-central-1"},"@timestamp":"2023-01-02T22:02:34.000Z","input":{"type":"aws-cloudwatch"},"metrics":{"tmin":849,"size":1981},"log.file.path":"/var/log/messages/luckcrafter","event":{"id":"sunsetmark","dataset":"generic","ingested":"2023-07-20T03:36:30.223806Z"},"agent":{"id":"c315dc22-3ea6-44dc-8d56-fd02f675367b","name":"fancydancer","ephemeral_id":"c315dc22-3ea6-44dc-8d56-fd02f675367b","type":"filebeat","version":"8.8.0"}} | ||
| {"index":{}} | ||
| {"message":"2024-04-11T18:00:10.965Z Apr 11 18:00:10 ip-32-11-43-93 sshd: cloak bolt thorn hugger rib jackal wolverine shaker boar fighter taker boulderfox","process":{"name":"sshd"},"aws.cloudwatch":{"log_stream":"mirrorlighter","ingestion_time":"2024-04-11T18:00:10.965Z","log_group":"/var/log/messages"},"tags":["preserve_original_event"],"meta":{"file":"2024-04-11/1712851210-sshd.ndjson.gz"},"cloud":{"region":"ap-southeast-3"},"@timestamp":"2023-05-01T21:59:58.000Z","input":{"type":"aws-cloudwatch"},"metrics":{"size":3166,"tmin":1},"log.file.path":"/var/log/messages/mirrorlighter","event":{"id":"patternantler","ingested":"2024-04-11T17:39:10.965818973Z","dataset":"generic"},"agent":{"id":"c79a289f-6c16-4de2-a6c8-8ee5c84473d5","name":"brindlehugger","type":"filebeat","version":"8.8.0","ephemeral_id":"c79a289f-6c16-4de2-a6c8-8ee5c84473d5"}} | ||
| {"index":{}} | ||
| {"message":"2024-04-11T10:15:01.628Z Apr 11 10:15:01 ip-95-21-51-112 kernel: kicker stinger slave dolphin sparkox","process":{"name":"kernel"},"aws.cloudwatch":{"log_stream":"plumebard","ingestion_time":"2024-04-11T10:15:01.628Z","log_group":"/var/log/messages"},"tags":["preserve_original_event"],"meta":{"file":"2024-04-11/1712826901-kernel.ndjson.gz"},"cloud":{"region":"ap-south-1"},"@timestamp":"2023-03-01T22:31:11.000Z","input":{"type":"aws-cloudwatch"},"metrics":{"size":3993,"tmin":1},"log.file.path":"/var/log/messages/plumebard","event":{"id":"chipgambler","ingested":"2024-04-11T10:09:29.628941177Z","dataset":"generic"},"agent":{"id":"5f25fa16-6a99-489f-b1c5-f27c0627a459","name":"lemongrabber","type":"filebeat","version":"8.8.0","ephemeral_id":"5f25fa16-6a99-489f-b1c5-f27c0627a459"}} |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,30 @@ | ||
| /* Extract log type and filename from file paths, calculate filename length, and sort by timestamp */ | ||
| /* | ||
| { | ||
| "name": "rex_regex_transformation", | ||
| "operation-type": "search", | ||
| "index": "{{index_name | default('big5')}}", | ||
| "body": { | ||
| "query": { | ||
| "match_all": {} | ||
| }, | ||
| "_source": { | ||
| "includes": ["log.file.path", "@timestamp"], | ||
| "excludes": [] | ||
| }, | ||
| "sort": [ | ||
| { | ||
| "@timestamp": { | ||
| "order": "desc", | ||
| "missing": "_last" | ||
| } | ||
| } | ||
| ] | ||
| } | ||
| } | ||
| */ | ||
aalva500-prog marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| source = big5 | ||
| | rex field=log.file.path '/var/log/(?<logType>\\w+)/(?<filename>\\w+)' | ||
| | eval filename_len = length(filename) | ||
| | fields log.file.path, logType, filename, filename_len, @timestamp | ||
| | sort - @timestamp | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,48 @@ | ||
| /* Filter messages containing 'sshd' and aggregate count by metrics.size */ | ||
| /* | ||
| { | ||
| "name": "script_engine_like_pattern_with_aggregation", | ||
| "operation-type": "search", | ||
| "index": "{{index_name | default('custom-big5')}}", | ||
| "body": { | ||
| "query": { | ||
| "script": { | ||
| "script": { | ||
| "source": "{\"langType\":\"calcite\",\"script\":\"...\"}", | ||
| "lang": "opensearch_compounded_script", | ||
| "params": { | ||
| "utcTimestamp": "{{current_timestamp}}" | ||
| } | ||
| }, | ||
| "boost": 1.0 | ||
| } | ||
| }, | ||
| "_source": { | ||
| "includes": ["message", "metrics.size"], | ||
| "excludes": [] | ||
| }, | ||
| "aggregations": { | ||
| "composite_buckets": { | ||
| "composite": { | ||
| "size": 10000, | ||
| "sources": [ | ||
| { | ||
| "metrics.size": { | ||
| "terms": { | ||
| "field": "metrics.size", | ||
| "missing_bucket": true, | ||
| "missing_order": "first", | ||
| "order": "asc" | ||
| } | ||
| } | ||
| } | ||
| ] | ||
| } | ||
| } | ||
| } | ||
| } | ||
| } | ||
| */ | ||
| source = big5 | ||
| | where like(`message`, '%sshd%') | ||
| | stats count() by metrics.size |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,39 @@ | ||
| /* Filter messages containing 'sshd', sort by timestamp, and return top 10 results */ | ||
| /* | ||
| { | ||
| "name": "script_engine_like_pattern_with_sort", | ||
| "operation-type": "search", | ||
| "index": "{{index_name | default('big5')}}", | ||
| "body": { | ||
| "query": { | ||
| "script": { | ||
| "script": { | ||
| "source": "{\"langType\":\"calcite\",\"script\":\"...\"}", | ||
| "lang": "opensearch_compounded_script", | ||
| "params": { | ||
| "utcTimestamp": "{{current_timestamp}}" | ||
| } | ||
| }, | ||
| "boost": 1.0 | ||
| } | ||
| }, | ||
| "size": 10, | ||
| "_source": { | ||
| "includes": ["agent", "process", "log", "message", "tags", "cloud", "input", "@timestamp", "ecs", "data_stream", "meta", "host", "metrics", "aws", "event"], | ||
| "excludes": [] | ||
| }, | ||
| "sort": [ | ||
| { | ||
| "@timestamp": { | ||
| "order": "desc", | ||
| "missing": "_last" | ||
| } | ||
| } | ||
| ] | ||
| } | ||
| } | ||
| */ | ||
| source = big5 | ||
| | where like(`message`, '%sshd%') | ||
| | sort - @timestamp | ||
| | head 10 |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,14 @@ | ||
| calcite: | ||
| logical: | | ||
| LogicalSystemLimit(sort0=[$4], dir0=[DESC-nulls-last], fetch=[10000], type=[QUERY_SIZE_LIMIT]) | ||
| LogicalSort(sort0=[$4], dir0=[DESC-nulls-last]) | ||
| LogicalProject(log.file.path=[$10], logType=[REX_EXTRACT($10, '/var/log/(?<logType>\w+)/(?<filename>\w+)', 'logType')], filename=[REX_EXTRACT($10, '/var/log/(?<logType>\w+)/(?<filename>\w+)', 'filename')], filename_len=[CHAR_LENGTH(REX_EXTRACT($10, '/var/log/(?<logType>\w+)/(?<filename>\w+)', 'filename'))], @timestamp=[$17]) | ||
| CalciteLogicalIndexScan(table=[[OpenSearch, big5]]) | ||
| physical: | | ||
| EnumerableCalc(expr#0..1=[{inputs}], expr#2=['/var/log/(?<logType>\w+)/(?<filename>\w+)'], expr#3=['logType'], expr#4=[REX_EXTRACT($t0, $t2, $t3)], expr#5=['filename'], expr#6=[REX_EXTRACT($t0, $t2, $t5)], expr#7=[CHAR_LENGTH($t6)], log.file.path=[$t0], $f1=[$t4], $f2=[$t6], $f3=[$t7], @timestamp=[$t1]) | ||
| CalciteEnumerableIndexScan(table=[[OpenSearch, big5]], PushDownContext=[[PROJECT->[log.file.path, @timestamp], SORT->[{ | ||
| "@timestamp" : { | ||
| "order" : "desc", | ||
| "missing" : "_last" | ||
| } | ||
| }], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","_source":{"includes":["log.file.path","@timestamp"],"excludes":[]},"sort":[{"@timestamp":{"order":"desc","missing":"_last"}}]}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Any reason to do correctness check for this one? I thought this IT is only for benchmark?
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This PR is to continue the work done here by @noCharger: #4816. I don't have the whole context, unfortunately. However, I think it is not only for benchmark, as the same was done for the
dedupcommand in this PR: #4991