Skip to content
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 8 additions & 1 deletion packages/apache_spark/_dev/build/docs/README.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Apache Spark
# Apache Spark Integration

The Apache Spark integration collects and parses data using the Jolokia Metricbeat Module.

Expand Down Expand Up @@ -63,6 +63,13 @@ Follow the same set of steps for Spark Worker, Driver and Executor.

## Metrics

### Executors

This is the `executors` data stream.

{{event "executors"}}

{{fields "executors"}}
### Nodes

This is the `nodes` data stream.
Expand Down
15 changes: 0 additions & 15 deletions packages/apache_spark/_dev/deploy/docker/Dockerfile

This file was deleted.

51 changes: 51 additions & 0 deletions packages/apache_spark/_dev/deploy/docker/application/wordcount.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

import sys
import signal
import time

from operator import add
from datetime import datetime

from pyspark.sql import SparkSession

if __name__ == "__main__":
if len(sys.argv) != 3:
print("Usage: wordcount <file>", file=sys.stderr)
sys.exit(-1)

spark = SparkSession\
.builder\
.master(sys.argv[2])\
.appName("PythonWordCount")\
.getOrCreate()

t_end = time.time() + 60 * 15

# Run loop for 15 mins
while time.time() < t_end:
lines = spark.read.text(sys.argv[1]).rdd.map(lambda r: r[0])
counts = lines.flatMap(lambda x: x.split(' ')) \
.map(lambda x: (x, 1)) \
.reduceByKey(add)
output = counts.collect()
for (word, count) in output:
print("%s: %i" % (word, count))

spark.stop()

27 changes: 23 additions & 4 deletions packages/apache_spark/_dev/deploy/docker/docker-compose.yml
Original file line number Diff line number Diff line change
@@ -1,9 +1,28 @@
version: '2'
version: '2.3'
services:
apache_spark:
hostname: apache-spark-main
build:
context: .
dockerfile: Dockerfile
image: docker.io/bitnami/spark@sha256:cb19b1bdebc0bc9dc20ea13f2109763be6a73b357b144a01efd94902540f6d27
ports:
- 7777
- 7779
- 7780
environment:
- SPARK_MAIN_URL=spark://apache-spark-main:7077
- SPARK_WORKER_MEMORY=1024G
- SPARK_WORKER_CORES=8
- SPARK_RPC_AUTHENTICATION_ENABLED=no
- SPARK_RPC_ENCRYPTION_ENABLED=no
- SPARK_LOCAL_STORAGE_ENCRYPTION_ENABLED=no
volumes:
- ./jolokia-agent:/usr/share/java/
- ./application:/opt/bitnami/spark/examples/src/main/python/
- ./jolokia-configs:/spark/conf/
- ./docker-entrypoint/docker-entrypoint.sh:/opt/bitnami/scripts/spark/docker-entrypoint.sh
healthcheck:
interval: 1s
retries: 120
timeout: 120s
test: |-
curl -f -s http://localhost:7777/jolokia/version -o /dev/null
entrypoint: /opt/bitnami/scripts/spark/docker-entrypoint.sh /opt/bitnami/scripts/spark/run.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
#!/bin/bash

echo 'export SPARK_MASTER_OPTS="$SPARK_MASTER_OPTS -javaagent:/usr/share/java/jolokia-agent.jar=config=/spark/conf/jolokia-master.properties"' >> "/opt/bitnami/spark/conf/spark-env.sh"
echo 'export SPARK_WORKER_OPTS="$SPARK_WORKER_OPTS -javaagent:/usr/share/java/jolokia-agent.jar=config=/spark/conf/jolokia-worker.properties"' >> "/opt/bitnami/spark/conf/spark-env.sh"

echo '*.sink.jmx.class=org.apache.spark.metrics.sink.JmxSink' >> "/opt/bitnami/spark/conf/metrics.properties"
echo '*.source.jvm.class=org.apache.spark.metrics.source.JvmSource' >> "/opt/bitnami/spark/conf/metrics.properties"

echo 'spark.driver.extraJavaOptions -javaagent:/usr/share/java/jolokia-agent.jar=config=/spark/conf/jolokia-driver.properties' >> "/opt/bitnami/spark/conf/spark-defaults.conf"
echo 'spark.executor.extraJavaOptions -javaagent:/usr/share/java/jolokia-agent.jar=config=/spark/conf/jolokia-executor.properties' >> "/opt/bitnami/spark/conf/spark-defaults.conf"

# shellcheck disable=SC1091

set -o errexit
set -o nounset
set -o pipefail
#set -o xtrace

# Load libraries
. /opt/bitnami/scripts/libbitnami.sh
. /opt/bitnami/scripts/libspark.sh

# Load Spark environment variables
eval "$(spark_env)"

print_welcome_page

if [ ! $EUID -eq 0 ] && [ -e "$LIBNSS_WRAPPER_PATH" ]; then
echo "spark:x:$(id -u):$(id -g):Spark:$SPARK_HOME:/bin/false" > "$NSS_WRAPPER_PASSWD"
echo "spark:x:$(id -g):" > "$NSS_WRAPPER_GROUP"
echo "LD_PRELOAD=$LIBNSS_WRAPPER_PATH" >> "$SPARK_CONFDIR/spark-env.sh"
fi

if [[ "$1" = "/opt/bitnami/scripts/spark/run.sh" ]]; then
info "** Starting Spark setup **"
/opt/bitnami/scripts/spark/setup.sh
info "** Spark setup finished! **"
fi

eval "$(spark_env)"
cd /opt/bitnami/spark/sbin
./start-worker.sh $SPARK_MAIN_URL --cores $SPARK_WORKER_CORES --memory $SPARK_WORKER_MEMORY &
cd /opt/bitnami/spark/examples/src/main/python/
/opt/bitnami/spark/bin/spark-submit wordcount.py wordcount.py $SPARK_MAIN_URL &

echo ""
exec "$@"
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -1,2 +1,4 @@
[Spark-Master]
stats: http://127.0.0.1:7777/jolokia/read
[Spark-Master]
stats: http://127.0.0.1:7777/jolokia/read
[Spark-Worker]
stats: http://127.0.0.1:7778/jolokia/read
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
host=0.0.0.0
port=7779
agentContext=/jolokia
backlog=100

policyLocation=file:///spark/conf/jolokia.policy
historyMaxEntries=10
debug=false
debugMaxEntries=100
maxDepth=15
maxCollectionSize=1000
maxObjects=0
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
host=0.0.0.0
port=7780
agentContext=/jolokia
backlog=100

policyLocation=file:///spark/conf/jolokia.policy
historyMaxEntries=10
debug=false
debugMaxEntries=100
maxDepth=15
maxCollectionSize=1000
maxObjects=0
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
host=0.0.0.0
port=7777
agentContext=/jolokia
backlog=100
policyLocation=file:///spark/conf/jolokia.policy
historyMaxEntries=10
debug=false
debugMaxEntries=100
maxDepth=15
maxCollectionSize=1000
maxObjects=0
host=0.0.0.0
port=7777
agentContext=/jolokia
backlog=100

policyLocation=file:///spark/conf/jolokia.policy
historyMaxEntries=10
debug=false
debugMaxEntries=100
maxDepth=15
maxCollectionSize=1000
maxObjects=0
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
host=0.0.0.0
port=7778
agentContext=/jolokia
backlog=100

policyLocation=file:///spark/conf/jolokia.policy
historyMaxEntries=10
debug=false
debugMaxEntries=100
maxDepth=15
maxCollectionSize=1000
maxObjects=0
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
<?xml version="1.0" encoding="utf-8"?>
<restrict>
<http>
<method>get</method>
<method>post</method>
</http>
<commands>
<command>read</command>
<command>list</command>
<command>search</command>
<command>version</command>
</commands>
</restrict>
<?xml version="1.0" encoding="utf-8"?>
<restrict>
<http>
<method>get</method>
<method>post</method>
</http>
<commands>
<command>read</command>
<command>list</command>
<command>search</command>
<command>version</command>
</commands>
</restrict>
3 changes: 3 additions & 0 deletions packages/apache_spark/changelog.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@

- version: "0.1.0"
changes:
- description: Implement "executors" data stream
type: enhancement
link: https://github.com/elastic/integrations/pull/2943
- description: Implement "nodes" data stream
type: enhancement
link: https://github.com/elastic/integrations/pull/2939
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
vars: ~
data_stream:
vars:
hosts:
- http://apache-spark-main:{{Ports.[2]}}
path:
- /jolokia/?ignoreErrors=true&canonicalNaming=false
Loading