Skip to content

Commit 47f7cf7

Browse files
authored
[apache_spark][executor] Add Apache Spark package with Executor data stream (#2943)
* Add executors data stream for Apache Spark * Update as per review comments * Add system test case and address review comments * Update README.md * Update the Docker image reference to a specific SHA * Change naming convention of datastream
1 parent a2e1a3a commit 47f7cf7

File tree

11 files changed

+1045
-116
lines changed

11 files changed

+1045
-116
lines changed

packages/apache_spark/_dev/build/docs/README.md

Lines changed: 16 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -70,13 +70,6 @@ This is the `application` data stream.
7070
{{event "application"}}
7171

7272
{{fields "application"}}
73-
### Nodes
74-
75-
This is the `nodes` data stream.
76-
77-
{{event "nodes"}}
78-
79-
{{fields "nodes"}}
8073

8174
### Driver
8275

@@ -85,3 +78,19 @@ This is the `driver` data stream.
8578
{{event "driver"}}
8679

8780
{{fields "driver"}}
81+
82+
### Executor
83+
84+
This is the `executor` data stream.
85+
86+
{{event "executor"}}
87+
88+
{{fields "executor"}}
89+
90+
### Nodes
91+
92+
This is the `nodes` data stream.
93+
94+
{{event "nodes"}}
95+
96+
{{fields "nodes"}}

packages/apache_spark/changelog.yml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,9 @@
22

33
- version: "0.1.0"
44
changes:
5+
- description: Implement "executor" data stream
6+
type: enhancement
7+
link: https://github.com/elastic/integrations/pull/2943
58
- description: Implement "driver" data stream
69
type: enhancement
710
link: https://github.com/elastic/integrations/pull/2945
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
vars: ~
2+
data_stream:
3+
vars:
4+
hosts:
5+
- http://apache-spark-main:{{Ports.[2]}}
6+
path:
7+
- /jolokia/?ignoreErrors=true&canonicalNaming=false
Lines changed: 269 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,269 @@
1+
metricsets: ["jmx"]
2+
namespace: "metrics"
3+
hosts:
4+
{{#each hosts}}
5+
- {{this}}
6+
{{/each}}
7+
path: {{path}}
8+
period: {{period}}
9+
jmx.mappings:
10+
- mbean: 'metrics:name=*.*.executor.bytesRead,type=counters'
11+
attributes:
12+
- attr: Count
13+
field: executor.bytes.read
14+
- mbean: 'metrics:name=*.*.executor.bytesWritten,type=counters'
15+
attributes:
16+
- attr: Count
17+
field: executor.bytes.written
18+
- mbean: 'metrics:name=*.*.ExecutorMetrics.DirectPoolMemory,type=gauges'
19+
attributes:
20+
- attr: Value
21+
field: executor.memory.direct_pool
22+
- mbean: 'metrics:name=*.*.executor.diskBytesSpilled,type=counters'
23+
attributes:
24+
- attr: Count
25+
field: executor.disk_bytes_spilled
26+
- mbean: 'metrics:name=*.*.HiveExternalCatalog.fileCacheHits,type=counters'
27+
attributes:
28+
- attr: Count
29+
field: executor.file_cache_hits
30+
- mbean: 'metrics:name=*.*.HiveExternalCatalog.filesDiscovered,type=counters'
31+
attributes:
32+
- attr: Count
33+
field: executor.files_discovered
34+
- mbean: 'metrics:name=*.*.executor.filesystem.file.largeRead_ops,type=gauges'
35+
attributes:
36+
- attr: Value
37+
field: executor.filesystem.file.large_read_ops
38+
- mbean: 'metrics:name=*.*.executor.filesystem.file.read_bytes,type=gauges'
39+
attributes:
40+
- attr: Value
41+
field: executor.filesystem.file.read_bytes
42+
- mbean: 'metrics:name=*.*.executor.filesystem.file.read_ops,type=gauges'
43+
attributes:
44+
- attr: Value
45+
field: executor.filesystem.file.read_ops
46+
- mbean: 'metrics:name=*.*.executor.filesystem.file.write_bytes,type=gauges'
47+
attributes:
48+
- attr: Value
49+
field: executor.filesystem.file.write_bytes
50+
- mbean: 'metrics:name=*.*.executor.filesystem.file.write_ops,type=gauges'
51+
attributes:
52+
- attr: Value
53+
field: executor.filesystem.file.write_ops
54+
- mbean: 'metrics:name=*.*.executor.filesystem.hdfs.largeRead_ops,type=gauges'
55+
attributes:
56+
- attr: Value
57+
field: executor.filesystem.hdfs.large_read_ops
58+
- mbean: 'metrics:name=*.*.executor.filesystem.hdfs.read_bytes,type=gauges'
59+
attributes:
60+
- attr: Value
61+
field: executor.filesystem.hdfs.read_bytes
62+
- mbean: 'metrics:name=*.*.executor.filesystem.hdfs.read_ops,type=gauges'
63+
attributes:
64+
- attr: Value
65+
field: executor.filesystem.hdfs.read_ops
66+
- mbean: 'metrics:name=*.*.executor.filesystem.hdfs.write_bytes,type=gauges'
67+
attributes:
68+
- attr: Value
69+
field: executor.filesystem.hdfs.write_bytes
70+
- mbean: 'metrics:name=*.*.executor.filesystem.hdfs.write_ops,type=gauges'
71+
attributes:
72+
- attr: Value
73+
field: executor.filesystem.hdfs.write_ops
74+
- mbean: 'metrics:name=*.*.HiveExternalCatalog.hiveClientCalls,type=counters'
75+
attributes:
76+
- attr: Count
77+
field: executor.hive_client_calls
78+
- mbean: 'metrics:name=*.*.JVMCPU.jvmCpuTime,type=gauges'
79+
attributes:
80+
- attr: Value
81+
field: executor.jvm.cpu_time
82+
- mbean: 'metrics:name=*.*.executor.jvmGCTime,type=counters'
83+
attributes:
84+
- attr: Count
85+
field: executor.jvm.gc_time
86+
- mbean: 'metrics:name=*.*.ExecutorMetrics.JVMHeapMemory,type=gauges'
87+
attributes:
88+
- attr: Value
89+
field: executor.memory.jvm.heap
90+
- mbean: 'metrics:name=*.*.ExecutorMetrics.JVMOffHeapMemory,type=gauges'
91+
attributes:
92+
- attr: Value
93+
field: executor.memory.jvm.off_heap
94+
- mbean: 'metrics:name=*.*.ExecutorMetrics.MajorGCCount,type=gauges'
95+
attributes:
96+
- attr: Value
97+
field: executor.gc.major.count
98+
- mbean: 'metrics:name=*.*.ExecutorMetrics.MajorGCTime,type=gauges'
99+
attributes:
100+
- attr: Value
101+
field: executor.gc.major.time
102+
- mbean: 'metrics:name=*.*.ExecutorMetrics.MappedPoolMemory,type=gauges'
103+
attributes:
104+
- attr: Value
105+
field: executor.memory.mapped_pool
106+
- mbean: 'metrics:name=*.*.executor.memoryBytesSpilled,type=counters'
107+
attributes:
108+
- attr: Count
109+
field: executor.memory_bytes_spilled
110+
- mbean: 'metrics:name=*.*.ExecutorMetrics.MinorGCCount,type=gauges'
111+
attributes:
112+
- attr: Value
113+
field: executor.gc.minor.count
114+
- mbean: 'metrics:name=*.*.ExecutorMetrics.MinorGCTime,type=gauges'
115+
attributes:
116+
- attr: Value
117+
field: executor.gc.minor.time
118+
- mbean: 'metrics:name=*.*.ExecutorMetrics.OffHeapExecutionMemory,type=gauges'
119+
attributes:
120+
- attr: Value
121+
field: executor.heap_memory.off.execution
122+
- mbean: 'metrics:name=*.*.ExecutorMetrics.OffHeapStorageMemory,type=gauges'
123+
attributes:
124+
- attr: Value
125+
field: executor.heap_memory.off.storage
126+
- mbean: 'metrics:name=*.*.ExecutorMetrics.OffHeapUnifiedMemory,type=gauges'
127+
attributes:
128+
- attr: Value
129+
field: executor.heap_memory.off.unified
130+
- mbean: 'metrics:name=*.*.ExecutorMetrics.OnHeapExecutionMemory,type=gauges'
131+
attributes:
132+
- attr: Value
133+
field: executor.heap_memory.on.execution
134+
- mbean: 'metrics:name=*.*.ExecutorMetrics.OnHeapStorageMemory,type=gauges'
135+
attributes:
136+
- attr: Value
137+
field: executor.heap_memory.on.storage
138+
- mbean: 'metrics:name=*.*.ExecutorMetrics.OnHeapUnifiedMemory,type=gauges'
139+
attributes:
140+
- attr: Value
141+
field: executor.heap_memory.on.unified
142+
- mbean: 'metrics:name=*.*.HiveExternalCatalog.parallelListingJobCount,type=counters'
143+
attributes:
144+
- attr: Count
145+
field: executor.parallel_listing_job_count
146+
- mbean: 'metrics:name=*.*.HiveExternalCatalog.partitionsFetched,type=counters'
147+
attributes:
148+
- attr: Count
149+
field: executor.partitions_fetched
150+
- mbean: 'metrics:name=*.*.ExecutorMetrics.ProcessTreeJVMRSSMemory,type=gauges'
151+
attributes:
152+
- attr: Value
153+
field: executor.process_tree.jvm.rss_memory
154+
- mbean: 'metrics:name=*.*.ExecutorMetrics.ProcessTreeJVMVMemory,type=gauges'
155+
attributes:
156+
- attr: Value
157+
field: executor.process_tree.jvm.v_memory
158+
- mbean: 'metrics:name=*.*.ExecutorMetrics.ProcessTreeOtherRSSMemory,type=gauges'
159+
attributes:
160+
- attr: Value
161+
field: executor.process_tree.other.rss_memory
162+
- mbean: 'metrics:name=*.*.ExecutorMetrics.ProcessTreeOtherVMemory,type=gauges'
163+
attributes:
164+
- attr: Value
165+
field: executor.process_tree.other.v_memory
166+
- mbean: 'metrics:name=*.*.ExecutorMetrics.ProcessTreePythonRSSMemory,type=gauges'
167+
attributes:
168+
- attr: Value
169+
field: executor.process_tree.python.rss_memory
170+
- mbean: 'metrics:name=*.*.ExecutorMetrics.ProcessTreePythonVMemory,type=gauges'
171+
attributes:
172+
- attr: Value
173+
field: executor.process_tree.python.v_memory
174+
- mbean: 'metrics:name=*.*.executor.recordsRead,type=counters'
175+
attributes:
176+
- attr: Count
177+
field: executor.records.read
178+
- mbean: 'metrics:name=*.*.executor.recordsWritten,type=counters'
179+
attributes:
180+
- attr: Count
181+
field: executor.records.written
182+
- mbean: 'metrics:name=*.*.executor.resultSerializationTime,type=counters'
183+
attributes:
184+
- attr: Count
185+
field: executor.result.serialization_time
186+
- mbean: 'metrics:name=*.*.executor.resultSize,type=counters'
187+
attributes:
188+
- attr: Count
189+
field: executor.result.size
190+
- mbean: 'metrics:name=*.*.executor.runTime,type=counters'
191+
attributes:
192+
- attr: Count
193+
field: executor.run_time
194+
- mbean: 'metrics:name=*.*.ExternalShuffle.shuffle-client.usedDirectMemory,type=gauges'
195+
attributes:
196+
- attr: Value
197+
field: executor.shuffle.client.used.direct_memory
198+
- mbean: 'metrics:name=*.*.ExternalShuffle.shuffle-client.usedHeapMemory,type=gauges'
199+
attributes:
200+
- attr: Value
201+
field: executor.shuffle.client.used.heap_memory
202+
- mbean: 'metrics:name=*.*.executor.shuffleBytesWritten,type=counters'
203+
attributes:
204+
- attr: Count
205+
field: executor.shuffle.bytes_written
206+
- mbean: 'metrics:name=*.*.executor.shuffleFetchWaitTime,type=counters'
207+
attributes:
208+
- attr: Count
209+
field: executor.shuffle.fetch_wait_time
210+
- mbean: 'metrics:name=*.*.executor.shuffleLocalBlocksFetched,type=counters'
211+
attributes:
212+
- attr: Count
213+
field: executor.shuffle.local.blocks_fetched
214+
- mbean: 'metrics:name=*.*.executor.shuffleLocalBytesRead,type=counters'
215+
attributes:
216+
- attr: Count
217+
field: executor.shuffle.local.bytes_read
218+
- mbean: 'metrics:name=*.*.executor.shuffleRecordsRead,type=counters'
219+
attributes:
220+
- attr: Count
221+
field: executor.shuffle.records.read
222+
- mbean: 'metrics:name=*.*.executor.shuffleRecordsWritten,type=counters'
223+
attributes:
224+
- attr: Count
225+
field: executor.shuffle.records.written
226+
- mbean: 'metrics:name=*.*.executor.shuffleRemoteBlocksFetched,type=counters'
227+
attributes:
228+
- attr: Count
229+
field: executor.shuffle.remote.blocks_fetched
230+
- mbean: 'metrics:name=*.*.executor.shuffleRemoteBytesRead,type=counters'
231+
attributes:
232+
- attr: Count
233+
field: executor.shuffle.remote.bytes_read
234+
- mbean: 'metrics:name=*.*.executor.shuffleRemoteBytesReadToDisk,type=counters'
235+
attributes:
236+
- attr: Count
237+
field: executor.shuffle.remote.bytes_read_to_disk
238+
- mbean: 'metrics:name=*.*.executor.shuffleTotalBytesRead,type=counters'
239+
attributes:
240+
- attr: Count
241+
field: executor.shuffle.total.bytes_read
242+
- mbean: 'metrics:name=*.*.executor.shuffleWriteTime,type=counters'
243+
attributes:
244+
- attr: Count
245+
field: executor.shuffle.write.time
246+
- mbean: 'metrics:name=*.*.executor.succeededTasks,type=counters'
247+
attributes:
248+
- attr: Count
249+
field: executor.succeeded_tasks
250+
- mbean: 'metrics:name=*.*.executor.threadpool.activeTasks,type=gauges'
251+
attributes:
252+
- attr: Value
253+
field: executor.threadpool.active_tasks
254+
- mbean: 'metrics:name=*.*.executor.threadpool.completeTasks,type=gauges'
255+
attributes:
256+
- attr: Value
257+
field: executor.threadpool.complete_tasks
258+
- mbean: 'metrics:name=*.*.executor.threadpool.currentPool_size,type=gauges'
259+
attributes:
260+
- attr: Value
261+
field: executor.threadpool.current_pool_size
262+
- mbean: 'metrics:name=*.*.executor.threadpool.maxPool_size,type=gauges'
263+
attributes:
264+
- attr: Value
265+
field: executor.threadpool.max_pool_size
266+
- mbean: 'metrics:name=*.*.executor.threadpool.startedTasks,type=gauges'
267+
attributes:
268+
- attr: Value
269+
field: executor.threadpool.started_tasks
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
---
2+
description: Pipeline for parsing Apache Spark executor metrics.
3+
processors:
4+
- set:
5+
field: ecs.version
6+
value: '8.1.0'
7+
- rename:
8+
field: jolokia.metrics
9+
target_field: apache_spark
10+
ignore_missing: true
11+
- set:
12+
field: event.type
13+
value: info
14+
- set:
15+
field: event.kind
16+
value: metric
17+
- set:
18+
field: event.module
19+
value: apache_spark
20+
- script:
21+
lang: painless
22+
description: This script will add the name of application under key 'driver/executor.application_name' and executor id under 'apache_spark.executor.id'
23+
if: ctx?.apache_spark?.mbean?.contains("name=worker.") == false &&
24+
ctx?.apache_spark?.mbean?.contains("name=worker.") == false &&
25+
ctx?.apache_spark?.mbean?.contains("name=application.") == false
26+
source: >-
27+
def bean_name = ctx.apache_spark.mbean.toString().splitOnToken("=")[1];
28+
def app_name = bean_name.splitOnToken(".")[0];
29+
def executor_id = bean_name.splitOnToken(".")[1];
30+
if (executor_id == "driver") {
31+
ctx.apache_spark.driver.application_name = app_name;
32+
} else {
33+
ctx.apache_spark.executor.application_name = app_name;
34+
ctx.apache_spark.executor.id = executor_id;
35+
}
36+
- remove:
37+
field:
38+
- apache_spark.mbean
39+
- jolokia
40+
ignore_failure: true
41+
on_failure:
42+
- set:
43+
field: error.message
44+
value: '{{ _ingest.on_failure_message }}'
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
- name: data_stream.dataset
2+
type: constant_keyword
3+
description: Data stream dataset.
4+
- name: data_stream.namespace
5+
type: constant_keyword
6+
description: Data stream namespace.
7+
- name: data_stream.type
8+
type: constant_keyword
9+
description: Data stream type.
10+
- name: '@timestamp'
11+
type: date
12+
description: Event timestamp.
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
- external: ecs
2+
name: event.kind
3+
- external: ecs
4+
name: event.type
5+
- external: ecs
6+
name: ecs.version
7+
- external: ecs
8+
name: tags
9+
- external: ecs
10+
name: service.address
11+
- external: ecs
12+
name: service.type

0 commit comments

Comments
 (0)