From 042cdd168a6c8d7d31824a890eb5bd1206617a5f Mon Sep 17 00:00:00 2001
From: panbingkun <panbingkun@baidu.com>
Date: Mon, 6 May 2024 22:21:29 +0800
Subject: [PATCH 1/2] make spark-profiler publish snapshot

---
 .github/workflows/maven_test.yml      | 10 +++++-----
 connector/profiler/README.md          |  2 +-
 connector/profiler/pom.xml            |  2 +-
 dev/create-release/release-build.sh   |  2 +-
 dev/deps/spark-deps-hadoop-3-hive-2.3 |  1 +
 dev/test-dependencies.sh              |  2 +-
 docs/building-spark.md                |  4 ++++
 7 files changed, 14 insertions(+), 9 deletions(-)

diff --git a/.github/workflows/maven_test.yml b/.github/workflows/maven_test.yml
index 38c6221247f9..d23cea926a27 100644
--- a/.github/workflows/maven_test.yml
+++ b/.github/workflows/maven_test.yml
@@ -190,18 +190,18 @@ jobs:
           export ENABLE_KINESIS_TESTS=0
           # Replace with the real module name, for example, connector#kafka-0-10 -> connector/kafka-0-10
           export TEST_MODULES=`echo "$MODULES_TO_TEST" | sed -e "s%#%/%g"`
-          ./build/mvn $MAVEN_CLI_OPTS -DskipTests -Pyarn -Pkubernetes -Pvolcano -Phive -Phive-thriftserver -Phadoop-cloud -Pspark-ganglia-lgpl -Pkinesis-asl -Djava.version=${JAVA_VERSION/-ea} clean install
+          ./build/mvn $MAVEN_CLI_OPTS -DskipTests -Pyarn -Pkubernetes -Pvolcano -Phive -Phive-thriftserver -Phadoop-cloud -Pjvm-profiler -Pspark-ganglia-lgpl -Pkinesis-asl -Djava.version=${JAVA_VERSION/-ea} clean install
           if [[ "$INCLUDED_TAGS" != "" ]]; then
-            ./build/mvn $MAVEN_CLI_OPTS -pl "$TEST_MODULES" -Pyarn -Pkubernetes -Pvolcano -Phive -Phive-thriftserver -Phadoop-cloud -Pspark-ganglia-lgpl -Pkinesis-asl -Djava.version=${JAVA_VERSION/-ea} -Dtest.include.tags="$INCLUDED_TAGS" test -fae
+            ./build/mvn $MAVEN_CLI_OPTS -pl "$TEST_MODULES" -Pyarn -Pkubernetes -Pvolcano -Phive -Phive-thriftserver -Phadoop-cloud -Pjvm-profiler -Pspark-ganglia-lgpl -Pkinesis-asl -Djava.version=${JAVA_VERSION/-ea} -Dtest.include.tags="$INCLUDED_TAGS" test -fae
           elif [[ "$MODULES_TO_TEST" == "connect" ]]; then
             ./build/mvn $MAVEN_CLI_OPTS -Dtest.exclude.tags="$EXCLUDED_TAGS" -Djava.version=${JAVA_VERSION/-ea} -pl connector/connect/client/jvm,connector/connect/common,connector/connect/server test -fae
           elif [[ "$EXCLUDED_TAGS" != "" ]]; then
-            ./build/mvn $MAVEN_CLI_OPTS -pl "$TEST_MODULES" -Pyarn -Pkubernetes -Pvolcano -Phive -Phive-thriftserver -Phadoop-cloud -Pspark-ganglia-lgpl -Pkinesis-asl -Djava.version=${JAVA_VERSION/-ea} -Dtest.exclude.tags="$EXCLUDED_TAGS" test -fae
+            ./build/mvn $MAVEN_CLI_OPTS -pl "$TEST_MODULES" -Pyarn -Pkubernetes -Pvolcano -Phive -Phive-thriftserver -Phadoop-cloud -Pjvm-profiler -Pspark-ganglia-lgpl -Pkinesis-asl -Djava.version=${JAVA_VERSION/-ea} -Dtest.exclude.tags="$EXCLUDED_TAGS" test -fae
           elif [[ "$MODULES_TO_TEST" == *"sql#hive-thriftserver"* ]]; then
             # To avoid a compilation loop, for the `sql/hive-thriftserver` module, run `clean install` instead
-            ./build/mvn $MAVEN_CLI_OPTS -pl "$TEST_MODULES" -Pyarn -Pkubernetes -Pvolcano -Phive -Phive-thriftserver -Phadoop-cloud -Pspark-ganglia-lgpl -Pkinesis-asl -Djava.version=${JAVA_VERSION/-ea} clean install -fae
+            ./build/mvn $MAVEN_CLI_OPTS -pl "$TEST_MODULES" -Pyarn -Pkubernetes -Pvolcano -Phive -Phive-thriftserver -Phadoop-cloud -Pjvm-profiler -Pspark-ganglia-lgpl -Pkinesis-asl -Djava.version=${JAVA_VERSION/-ea} clean install -fae
           else
-            ./build/mvn $MAVEN_CLI_OPTS -pl "$TEST_MODULES" -Pyarn -Pkubernetes -Pvolcano -Phive -Phive-thriftserver -Pspark-ganglia-lgpl -Phadoop-cloud -Pkinesis-asl -Djava.version=${JAVA_VERSION/-ea} test -fae
+            ./build/mvn $MAVEN_CLI_OPTS -pl "$TEST_MODULES" -Pyarn -Pkubernetes -Pvolcano -Phive -Phive-thriftserver -Pspark-ganglia-lgpl -Phadoop-cloud -Pjvm-profiler -Pkinesis-asl -Djava.version=${JAVA_VERSION/-ea} test -fae
           fi
       - name: Clean up local Maven repository
         run: |  
diff --git a/connector/profiler/README.md b/connector/profiler/README.md
index 527f8b487d4d..d928a47cab7d 100644
--- a/connector/profiler/README.md
+++ b/connector/profiler/README.md
@@ -23,7 +23,7 @@ Code profiling is currently only supported for
 To get maximum profiling information set the following jvm options for the executor :
 
 ```
-    -XX:+UnlockDiagnosticVMOptions -XX:+DebugNonSafepoints -XX:+PreserveFramePointer
+spark.executor.extraJavaOptions=-XX:+UnlockDiagnosticVMOptions -XX:+DebugNonSafepoints -XX:+PreserveFramePointer
 ```
 
 For more information on async_profiler see the [Async Profiler Manual](https://krzysztofslusarski.github.io/2022/12/12/async-manual.html)
diff --git a/connector/profiler/pom.xml b/connector/profiler/pom.xml
index 933a74edc0a9..14e5a73e31f1 100644
--- a/connector/profiler/pom.xml
+++ b/connector/profiler/pom.xml
@@ -44,7 +44,7 @@
     <dependency>
       <groupId>me.bechberger</groupId>
       <artifactId>ap-loader-all</artifactId>
-      <version>3.0-8</version>
+      <version>3.0-9</version>
     </dependency>
   </dependencies>
 </project>
diff --git a/dev/create-release/release-build.sh b/dev/create-release/release-build.sh
index 62d172ef74ca..75ec98464f3e 100755
--- a/dev/create-release/release-build.sh
+++ b/dev/create-release/release-build.sh
@@ -201,7 +201,7 @@ SCALA_2_12_PROFILES="-Pscala-2.12"
 HIVE_PROFILES="-Phive -Phive-thriftserver"
 # Profiles for publishing snapshots and release to Maven Central
 # We use Apache Hive 2.3 for publishing
-PUBLISH_PROFILES="$BASE_PROFILES $HIVE_PROFILES -Pspark-ganglia-lgpl -Pkinesis-asl -Phadoop-cloud"
+PUBLISH_PROFILES="$BASE_PROFILES $HIVE_PROFILES -Pspark-ganglia-lgpl -Pkinesis-asl -Phadoop-cloud -Pjvm-profiler"
 # Profiles for building binary releases
 BASE_RELEASE_PROFILES="$BASE_PROFILES -Psparkr"
 
diff --git a/dev/deps/spark-deps-hadoop-3-hive-2.3 b/dev/deps/spark-deps-hadoop-3-hive-2.3
index 5d933e34e40b..2b153d21fcb6 100644
--- a/dev/deps/spark-deps-hadoop-3-hive-2.3
+++ b/dev/deps/spark-deps-hadoop-3-hive-2.3
@@ -14,6 +14,7 @@ annotations/17.0.0//annotations-17.0.0.jar
 antlr-runtime/3.5.2//antlr-runtime-3.5.2.jar
 antlr4-runtime/4.13.1//antlr4-runtime-4.13.1.jar
 aopalliance-repackaged/3.0.3//aopalliance-repackaged-3.0.3.jar
+ap-loader-all/3.0-9//ap-loader-all-3.0-9.jar
 arpack/3.0.3//arpack-3.0.3.jar
 arpack_combined_all/0.1//arpack_combined_all-0.1.jar
 arrow-format/16.0.0//arrow-format-16.0.0.jar
diff --git a/dev/test-dependencies.sh b/dev/test-dependencies.sh
index 175f59a70094..048c59f4cec9 100755
--- a/dev/test-dependencies.sh
+++ b/dev/test-dependencies.sh
@@ -31,7 +31,7 @@ export LC_ALL=C
 # NOTE: These should match those in the release publishing script, and be kept in sync with
 #   dev/create-release/release-build.sh
 HADOOP_MODULE_PROFILES="-Phive-thriftserver -Pkubernetes -Pyarn -Phive \
-    -Pspark-ganglia-lgpl -Pkinesis-asl -Phadoop-cloud"
+    -Pspark-ganglia-lgpl -Pkinesis-asl -Phadoop-cloud -Pjvm-profiler"
 MVN="build/mvn"
 HADOOP_HIVE_PROFILES=(
     hadoop-3-hive-2.3
diff --git a/docs/building-spark.md b/docs/building-spark.md
index d10dfc9434fe..e71d6727080a 100644
--- a/docs/building-spark.md
+++ b/docs/building-spark.md
@@ -117,6 +117,10 @@ where `spark-streaming_{{site.SCALA_BINARY_VERSION}}` is the `artifactId` as def
 
     ./build/mvn -Pconnect -DskipTests clean package
 
+## Building with JVM Profile support
+
+    ./build/mvn -Pjvm-profiler -DskipTests clean package
+
 ## Continuous Compilation
 
 We use the scala-maven-plugin which supports incremental and continuous compilation. E.g.

From 2a0589bbcb510a54810d287f1fad8bcf9f9693d8 Mon Sep 17 00:00:00 2001
From: panbingkun <panbingkun@baidu.com>
Date: Tue, 7 May 2024 14:20:42 +0800
Subject: [PATCH 2/2] update

---
 connector/profiler/pom.xml            | 6 +++++-
 dev/deps/spark-deps-hadoop-3-hive-2.3 | 1 -
 docs/building-spark.md                | 3 +++
 3 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/connector/profiler/pom.xml b/connector/profiler/pom.xml
index 14e5a73e31f1..7746a6934632 100644
--- a/connector/profiler/pom.xml
+++ b/connector/profiler/pom.xml
@@ -31,6 +31,9 @@
   </properties>
   <packaging>jar</packaging>
   <name>Spark Profiler</name>
+  <description>
+    Enables code profiling of executors based on the the async profiler.
+  </description>
   <url>https://spark.apache.org/</url>
 
   <dependencies>
@@ -44,7 +47,8 @@
     <dependency>
       <groupId>me.bechberger</groupId>
       <artifactId>ap-loader-all</artifactId>
-      <version>3.0-9</version>
+      <version>3.0-8</version>
+      <scope>provided</scope>
     </dependency>
   </dependencies>
 </project>
diff --git a/dev/deps/spark-deps-hadoop-3-hive-2.3 b/dev/deps/spark-deps-hadoop-3-hive-2.3
index 2b153d21fcb6..5d933e34e40b 100644
--- a/dev/deps/spark-deps-hadoop-3-hive-2.3
+++ b/dev/deps/spark-deps-hadoop-3-hive-2.3
@@ -14,7 +14,6 @@ annotations/17.0.0//annotations-17.0.0.jar
 antlr-runtime/3.5.2//antlr-runtime-3.5.2.jar
 antlr4-runtime/4.13.1//antlr4-runtime-4.13.1.jar
 aopalliance-repackaged/3.0.3//aopalliance-repackaged-3.0.3.jar
-ap-loader-all/3.0-9//ap-loader-all-3.0-9.jar
 arpack/3.0.3//arpack-3.0.3.jar
 arpack_combined_all/0.1//arpack_combined_all-0.1.jar
 arrow-format/16.0.0//arrow-format-16.0.0.jar
diff --git a/docs/building-spark.md b/docs/building-spark.md
index e71d6727080a..73fc31610d95 100644
--- a/docs/building-spark.md
+++ b/docs/building-spark.md
@@ -121,6 +121,9 @@ where `spark-streaming_{{site.SCALA_BINARY_VERSION}}` is the `artifactId` as def
 
     ./build/mvn -Pjvm-profiler -DskipTests clean package
 
+**Note:** The `jvm-profiler` profile builds the assembly without including the dependency `ap-loader`,
+you can download it manually from maven central repo and use it together with `spark-profiler_{{site.SCALA_BINARY_VERSION}}`.
+
 ## Continuous Compilation
 
 We use the scala-maven-plugin which supports incremental and continuous compilation. E.g.